static void TrainMultiToBinaryPredictorSparse(bool singleColumn, bool checkError)
        {
            var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           "lr", singleColumn ? "C" : "Vec");
            var trainFile        = FileHelper.GetTestFile("Train-28x28_small.txt");
            var testFile         = FileHelper.GetTestFile("Test-28x28_small.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(trainFile));
                var roles  = env.CreateExamples(loader, "Features", "Label");
                var df     = DataFrameIO.ReadView(roles.Data);
                Assert.IsTrue(df.Shape[0] > 0);
                var iova = string.Format("iova{{p=lr sc={0} nt=1}}", singleColumn ? "+" : "-");
                loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(testFile));
                var trainer = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
Esempio n. 2
0
            void FillCacheIfNotFilled()
            {
                lock (_lock)
                {
                    if (!(_autoView is null))
                    {
                        return;
                    }

                    _autoView = DataFrameIO.ReadView(_input, keepVectors: true, numThreads: _numThreads);

                    if (_sortColumn >= 0)
                    {
                        var    sortedPosition = new List <KeyValuePair <TValue, long> >();
                        long   position       = 0;
                        TValue got            = default(TValue);

                        // We could use multithreading here but the cost of sorting
                        // might be higher than going through an array in memory.
                        using (var cursor = _autoView.GetRowCursor(_autoView.Schema.Where(c => c.Index == _sortColumn)))
                        {
                            var sortColumnGetter = cursor.GetGetter <TValue>(
                                SchemaHelper._dc(_sortColumn, cursor));
                            while (cursor.MoveNext())
                            {
                                sortColumnGetter(ref got);
                                sortedPosition.Add(new KeyValuePair <TValue, long>(got, position));
                                ++position;
                            }
                        }
                        sortedPosition.Sort(CompareTo);
                        _autoView.Order(sortedPosition.Select(c => (int)c.Value).ToArray());
                    }
                }
            }
        public void TestTagTrainOrScoreTransformCustomScorer()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=-}",
                                              new MultiFileSource(dataFilePath));

                using (var pipe = new ScikitPipeline(new[] {
                    "Concat{col=Feature:Slength,Swidth}",
                    "TagTrainScore{tr=iova{p=ft{nl=10 iter=1}} lab=Label feat=Feature tag=model scorer=MultiClassClassifierScorer{ex=AA}}"
                }, host: env))
                {
                    pipe.Train(loader);
                    var pred = pipe.Predict(loader);
                    var df   = DataFrameIO.ReadView(pred);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(150, 11));
                    var dfs = df.Head().ToString();
                    Assert.IsTrue(dfs.StartsWith("Label,Slength,Swidth,Plength,Pwidth,Feature.0,Feature.1,PredictedLabelAA,ScoreAA.0,ScoreAA.1,ScoreAA.2"));
                }
            }
        }
Esempio n. 4
0
        public void TestEP_PassThroughTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var iris       = FileHelper.GetTestFile("iris.txt");
            var outPass    = FileHelper.GetOutputFile("data.idv", methodName);
            var df         = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.PassThrough()
            {
                Filename = outPass, SaveOnDisk = true
            });
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
            Assert.IsTrue(File.Exists(outPass));
        }
Esempio n. 5
0
        private void ScikitAPI_SimpleTransform_Load(bool removeFirstTransform)
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var output     = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName);
            var inputs     = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            string expected = null;

            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data = host.CreateStreamingDataView(inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host))
                {
                    var predictor = pipe.Train(data);
                    Assert.IsTrue(predictor != null);
                    var data2       = host.CreateStreamingDataView(inputs2);
                    var predictions = pipe.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    expected = dfs2;
                    Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25");
                    pipe.Save(output, removeFirstTransform);
                }
            }
            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data2 = host.CreateStreamingDataView(inputs2);
                using (var pipe2 = new ScikitPipeline(output, host))
                {
                    var predictions = pipe2.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(expected, dfs2);
                }
            }
        }
Esempio n. 6
0
        public void TestScikitAPI_SimplePredictor()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                    var data2       = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2));
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"));
                }
            }
        }
 public void TestI_PolynomialTransformNumericValues()
 {
     using (var host = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0");
         raw.SetShuffle(false);
         var loader = host.CreateTransform("concat{col=X:A,B}", raw);
         var data   = host.CreateTransform("Poly{col=X}", loader);
         var res    = DataFrameIO.ReadView(data);
         var txt    = res.ToString();
         Assert.IsFalse(string.IsNullOrEmpty(txt));
         var exp   = "A,B,X.0,X.1,X.2,X.3,X.4\n1.0,2.0,1.0,2.0,1.0,2.0,4.0\n2.0,3.0,2.0,3.0,4.0,6.0,9.0\n10.0,11.0,10.0,11.0,100.0,110.0,121.0";
         var dfexp = DataFrameIO.ReadStr(exp);
         Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true));
     }
 }
 public void TestI_ScalerTransformNumericValuesMinMax()
 {
     using (var host = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0");
         raw.SetShuffle(false);
         var loader = host.CreateTransform("concat{col=X:A,B}", raw);
         var data   = host.CreateTransform("Scaler{col=X scale=minMax}", loader);
         (data as ITrainableTransform).Estimate();
         var res   = DataFrameIO.ReadView(data);
         var txt   = res.ToString();
         var exp   = "A,B,X.0,X.1\n1.0,2.0,0.0,0.0\n2.0,3.0,0.11111111,0.11111111\n10.0,11.0,1.0,1.0";
         var dfexp = DataFrameIO.ReadStr(exp);
         Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true));
     }
 }
Esempio n. 9
0
 public void TestEP_NearestNeighborsLPMc()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline(conc: 1);
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Scikit.ML.EntryPoints.NearestNeighborsMultiClass());
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 11));
     }
 }
        public void TestEP_ScalerTransform()
        {
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
        }
 public void TestI_ScalerTransformNumericValuesMeanVar()
 {
     /*using (*/
     var host = EnvHelper.NewTestEnvironment(conc: 1);
     {
         var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0");
         raw.SetShuffle(false);
         var loader = host.CreateTransform("concat{col=X:A,B}", raw);
         var data   = host.CreateTransform("Scaler{col=X}", loader);
         (data as ITrainableTransform).Estimate();
         var res = DataFrameIO.ReadView(data);
         var txt = res.ToString();
         Assert.IsNotNull(txt);
         var exp   = "A,B,X.0,X.1\n1.0,2.0,-0.827605963,-0.827605963\n2.0,3.0,-0.5793242,-0.5793242\n10.0,11.0,1.40693,1.40693";
         var dfexp = DataFrameIO.ReadStr(exp);
         Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true, sortBy: "A"));
     }
 }
Esempio n. 12
0
        public void TestScikitAPI_SimpleTransform()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host))
                {
                    var predictor = pipe.Train(data);
                    Assert.IsTrue(predictor != null);
                    var data2       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2);
                    var predictions = pipe.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25");
                }
            }
        }
 public void TestTreePathInnerAPI()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
         var iris       = FileHelper.GetTestFile("iris.txt");
         var df         = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Sepal_length,Sepal_width}",
                                                      "TreeFeat{tr=ft{iter=2} lab=Label feat=Feature}" }))
         {
             pipe.Train(df);
             var scorer = pipe.Predict(df);
             var dfout  = DataFrameIO.ReadView(scorer);
             Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 31));
             var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName);
             dfout.ToCsv(outfile);
             Assert.IsTrue(File.Exists(outfile));
         }
     }
 }
 public void TestTreePathNewAPI()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline();
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()
         {
             MaxIterations = 2
         });
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
         var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName);
         dfout.ToCsv(outfile);
         Assert.IsTrue(File.Exists(outfile));
     }
 }
        /// <summary>
        /// Trains the pipeline with data coming from a <see cref="IDataView"/>.
        /// </summary>
        public ScikitPipeline Train(IDataView data,
                                    string feature = "Feature", string label = null,
                                    string weight  = null, string groupId    = null)
        {
            IDataView trans = data;

            using (var ch = _env.Start("Create transforms"))
            {
                for (int i = 0; i < _transforms.Length; ++i)
                {
                    try
                    {
                        trans = _env.CreateTransform(_transforms[i].transformSettings, trans);
                    }
                    catch (Exception e)
                    {
                        if (e.ToString().Contains("Unknown loadable class"))
                        {
                            var nn   = _env.ComponentCatalog.GetAllClasses().Length;
                            var filt = _env.ComponentCatalog.GetAllClasses()
                                       .Select(c => c.UserName)
                                       .OrderBy(c => c)
                                       .Where(c => c.Trim().Length > 2);
                            var regis = string.Join("\n", filt);
                            throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}");
                        }
                        throw e;
                    }
                    _transforms[i].transform = trans as IDataTransform;
                }
            }

            if (_predictor != null)
            {
                using (var ch = _env.Start("Create Predictor"))
                {
                    _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings);
                    _roles             = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >();
                    _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature));
                    if (!string.IsNullOrEmpty(label))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label));
                    }
                    if (!string.IsNullOrEmpty(groupId))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId));
                    }
                    if (!string.IsNullOrEmpty(weight))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight));
                    }
                    var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight);
                    _predictor.predictor   = _predictor.trainer.Train(_env, ch, roleMap);
                    _predictor.roleMapData = roleMap;
                }
            }
            else
            {
                _predictor = new StepPredictor()
                {
                    predictor       = null,
                    trainer         = null,
                    trainerSettings = null,
                    roleMapData     = new RoleMappedData(trans)
                };

                // We predict one to make sure everything works fine.
                using (var ch = _env.Start("Compute one prediction."))
                {
                    var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env);
                    if (df.Length == 0)
                    {
                        throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output.");
                    }
                }
            }
            return(this);
        }
        /// <summary>
        /// Runs a simple test.
        /// </summary>
        public static void TestScikitAPI()
        {
            var inputs = new[] {
                new ExampleVector()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleVector()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleVector()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            using (var host = new ConsoleEnvironment(conc: 1))
            {
                ComponentHelper.AddStandardComponents(host);
                var data = host.CreateStreamingDataView(inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    if (predictor == null)
                    {
                        throw new Exception("Test failed: no predictor.");
                    }
                    var data2       = host.CreateStreamingDataView(inputs2);
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    if (df.Shape.Item1 != 4 || df.Shape.Item2 != 12)
                    {
                        throw new Exception("Test failed: prediction failed.");
                    }
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"))
                    {
                        throw new Exception("Test failed: prediction failed (header).");
                    }
                }
            }
        }
Esempio n. 17
0
        public void TestTimeSeriesFloatRegression()
        {
            var inputs = new[] {
                new InputOutput()
                {
                    X = 5f, time = 0f, one = 1f
                },
                new InputOutput()
                {
                    X = 7f, time = 1f, one = 1f
                },
                new InputOutput()
                {
                    X = 9f, time = 2f, one = 1f
                },
                new InputOutput()
                {
                    X = 11f, time = 3f, one = 1f
                },
                new InputOutput()
                {
                    X = 5f, time = 0f, one = 1f
                },
                new InputOutput()
                {
                    X = 7f, time = 1f, one = 1f
                },
                new InputOutput()
                {
                    X = 9f, time = 2f, one = 1f
                },
                new InputOutput()
                {
                    X = 11f, time = 3f, one = 1f
                },
                new InputOutput()
                {
                    X = 5f, time = 0f, one = 1f
                },
                new InputOutput()
                {
                    X = 7f, time = 1f, one = 1f
                },
                new InputOutput()
                {
                    X = 9f, time = 2f, one = 1f
                },
                new InputOutput()
                {
                    X = 11f, time = 3f, one = 1f
                },
            };

            using (var host = EnvHelper.NewTestEnvironment())
            {
                var data = host.CreateStreamingDataView(inputs);
                using (var pipe = new ScikitPipeline(new[] { "concat{col=xt:time,one}" }, "sasdcar{iter=50}", host))
                {
                    pipe.Train(data, feature: "xt", label: "X");
                    var view = pipe.Predict(data);
                    var df   = DataFrameIO.ReadView(view).Head(4).Copy();
                    df["diff"] = df["Score"] - df["X"];
                    var exp = DataFrameIO.ReadStr("null\n0\n0\n0\n0");
                    df["diff"].AssertAlmostEqual(exp["null"].AsType(NumberType.R4), precision: 1e-1);
                }
            }
        }
Esempio n. 18
0
        public void TestScikitAPI_SimplePredictor_FastValueMapper()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };
            DataFrame df1, df2, df3;

            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data  = host.CreateStreamingDataView(inputs);
                var data2 = host.CreateStreamingDataView(inputs2);
                df1 = DataFrameIO.ReadView(data, env: host, keepVectors: true);
                df2 = DataFrameIO.ReadView(data2, env: host, keepVectors: true);
                df3 = DataFrameIO.ReadView(data2, env: host, keepVectors: true);
            }

            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    DataFrame pred = null, pred2 = null;
                    var       predictor = pipe.Train(df1, feature: "X");
                    Assert.IsTrue(predictor != null);

                    pipe.Predict(df2, ref pred);
                    Assert.AreEqual(pred.Shape, new Tuple <int, int>(4, 3));
                    var dfs  = pred.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"))
                    {
                        throw new Exception($"Wrong starts\n{dfs2}");
                    }

                    pipe.Predict(df3, ref pred2);
                    pred.AssertAlmostEqual(pred2);
                }
            }
        }
Esempio n. 19
0
        private void ScikitAPI_SimplePredictor_Load(bool removeFirstTransform)
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var output     = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName);
            var inputs     = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            string expected = null;

            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data = host.CreateStreamingDataView(inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                    var data2       = host.CreateStreamingDataView(inputs2);
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"))
                    {
                        throw new Exception($"Wrong starts\n{dfs2}");
                    }
                    expected = dfs2;
                    pipe.Save(output, removeFirstTransform);
                }
            }
            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data2 = host.CreateStreamingDataView(inputs2);
                using (var pipe2 = new ScikitPipeline(output, host))
                {
                    var predictions = pipe2.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(expected, dfs2);
                }
            }
        }
Esempio n. 20
0
 /// <summary>
 /// Creates a <see cref="DataFrame"/> from a IDataView.
 /// </summary>
 public static DataFrame ReadView(IDataView view, int nrows = -1)
 {
     return(DataFrameIO.ReadView(view, nrows));
 }