public void TestTagTrainOrScoreTransformCustomScorer()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=-}",
                                              new MultiFileSource(dataFilePath));

                using (var pipe = new ScikitPipeline(new[] {
                    "Concat{col=Feature:Slength,Swidth}",
                    "TagTrainScore{tr=iova{p=ft{nl=10 iter=1}} lab=Label feat=Feature tag=model scorer=MultiClassClassifierScorer{ex=AA}}"
                }, host: env))
                {
                    pipe.Train(loader);
                    var pred = pipe.Predict(loader);
                    var df   = DataFrameIO.ReadView(pred);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(150, 11));
                    var dfs = df.Head().ToString();
                    Assert.IsTrue(dfs.StartsWith("Label,Slength,Swidth,Plength,Pwidth,Feature.0,Feature.1,PredictedLabelAA,ScoreAA.0,ScoreAA.1,ScoreAA.2"));
                }
            }
        }
        public void TestI_ResampleSerialization()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("iris.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 " +
                                              "col=Pwidth:R4:4 header=+ sep=tab}",
                                              new MultiFileSource(dataFilePath));
                var sorted = env.CreateTransform("resample{lambda=1 c=-}", loader);
                DataViewHelper.ToCsv(env, sorted, outputDataFilePath);

                var lines = File.ReadAllLines(outputDataFilePath);
                int begin = 0;
                for (; begin < lines.Length; ++begin)
                {
                    if (lines[begin].StartsWith("Label"))
                    {
                        break;
                    }
                }
                lines = lines.Skip(begin).ToArray();
                var linesSorted = lines.OrderBy(c => c).ToArray();
                for (int i = 1; i < linesSorted.Length; ++i)
                {
                    if (linesSorted[i - 1][0] > linesSorted[i][0])
                    {
                        throw new Exception("The output is not sorted.");
                    }
                }
            }
        }
        public void TestI_DescribeTransformSaveDataAndZip()
        {
            using (var env = EnvHelper.NewTestEnvironment())
            {
                var inputs = InputOutput.CreateInputs();
                var data   = env.CreateStreamingDataView(inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;

                var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
                StreamHelper.SavePredictions(env, tr, outputDataFilePath);
                Assert.IsTrue(File.Exists(outputDataFilePath));

                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                StreamHelper.SaveModel(env, tr, outModelFilePath);
                Assert.IsTrue(File.Exists(outModelFilePath));

                var outputDataFilePath2 = FileHelper.GetOutputFile("outputDataFilePath2.txt", methodName);
                StreamHelper.SavePredictions(env, outModelFilePath, outputDataFilePath2, data);
                Assert.IsTrue(File.Exists(outputDataFilePath2));

                var d1 = File.ReadAllText(outputDataFilePath);
                Assert.IsTrue(d1.Length > 0);
                var d2 = File.ReadAllText(outputDataFilePath2);
                Assert.AreEqual(d1, d2);
            }
        }
        public void TestChainTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView      loader = host.CreateStreamingDataView(inputs);
                IDataTransform data   = host.CreateTransform("Scaler{col=X4:X}", loader);
                data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestTagViewTransform()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("Scaler{col=X1:X}", loader);
                data = host.CreateTransform("tag{t=memory}", data);

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
Esempio n. 6
0
        public void TestLambdaColumnPassThroughTransform()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data       = host.CreateStreamingDataView(inputs);
                var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host,
                                                                                               "Lambda", data, "X", "XX", new VectorType(NumberType.R4, 2),
                                                                                               new VectorType(NumberType.R4, 2),
                                                                                               (in VBuffer <float> src, ref VBuffer <float> dst) =>
                {
                    dst           = new VBuffer <float>(2, new float[2]);
                    dst.Values[0] = src.Values[0] + 1f;
                    dst.Values[1] = src.Values[1] + 1f;
                });
        public void TestI_DescribeTransformCode()
        {
            using (var env = EnvHelper.NewTestEnvironment())
            {
                var inputs = InputOutput.CreateInputs();
                var data   = env.CreateStreamingDataView(inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(i => true))
                {
                    var columnGetter = cursor.GetGetter <int>(1);
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                Assert.AreEqual(values.Count, 4);
            }
        }
        static void TrainPrePostProcessTrainer(string modelName, bool checkError, int threads, bool addpre)
        {
            var methodName       = string.Format("{0}-{1}-T{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, modelName, threads);
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var trainFile        = FileHelper.GetOutputFile("iris_train.idv", methodName);
            var testFile         = FileHelper.GetOutputFile("iris_test.idv", methodName);
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0))
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));
                var xf = env.CreateTransform("shuffle{force=+}", loader); // We shuffle because Iris is order by label.
                xf = env.CreateTransform("concat{col=Features:Slength,Swidth}", xf);
                var roles = env.CreateExamples(xf, "Features", "Label");

                string pred = addpre ? "PrePost{pre=poly{col=Features} p=___ pret=Take{n=80}}" : "PrePost{p=___ pret=Take{n=80}}";
                pred = pred.Replace("___", modelName);
                var trainer = env.CreateTrainer(pred);
                using (var ch = env.Start("Train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MultiClassClassification, checkError, ratio: 0.15f);
                }
            }
        }
        public void TestI_ScalerTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("Scaler{col=X}", loader);
                (data as ITrainableTransform).Estimate();

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                var nb = DataViewUtils.ComputeRowCount(data);
                if (nb < 1)
                {
                    throw new Exception("empty view");
                }

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
Esempio n. 10
0
        public void TestDataViewCacheDataFrameSerialization()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("mc_iris.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
            var outModelFilePath   = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));
                var sorted = env.CreateTransform("cachedf", loader);
                StreamHelper.SaveModel(env, sorted, outModelFilePath);

                using (var fs = File.OpenRead(outModelFilePath))
                {
                    var deserializedData = ModelFileUtils.LoadTransforms(env, loader, fs);
                    var saver            = env.CreateSaver("Text");
                    using (var fs2 = File.Create(outputDataFilePath))
                        saver.SaveData(fs2, deserializedData,
                                       StreamHelper.GetColumnsIndex(deserializedData.Schema,
                                                                    new[] { "Label", "Slength", "Swidth", "Plength", "Pwidth" }));
                }
            }
        }
Esempio n. 11
0
        public static void TrainkNNMultiClassification(int k, NearestNeighborsWeights weight, int threads, float ratio = 0.2f,
                                                       string distance = "L2")
        {
            var methodName       = string.Format("{0}-k{1}-W{2}-T{3}-D{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, k, weight, threads, distance);
            var dataFilePath     = FileHelper.GetTestFile("iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            var env = k == 1 ? EnvHelper.NewTestEnvironment(conc: 1) : EnvHelper.NewTestEnvironment();

            using (env)
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var    concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var    roles  = env.CreateExamples(concat, "Features", "Label");
                string modelDef;
                modelDef = string.Format("knnmc{{k={0} weighting={1} nt={2} distance={3}}}", k,
                                         weight == NearestNeighborsWeights.distance ? "distance" : "uniform", threads, distance);
                var trainer = env.CreateTrainer(modelDef);
                using (var ch = env.Start("test"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                PredictionKind.MultiClassClassification, true, ratio: ratio);
                }
            }
        }
Esempio n. 12
0
        public void TestI_DescribeTransformCode()
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = InputOutput.CreateInputs();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(tr.Schema))
                {
                    var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                Assert.AreEqual(values.Count, 4);
            }
        }
        public static void TrainMultiToRankerPredictorSparse(bool singleColumn, bool checkError)
        {
            var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           "lr", singleColumn ? "C" : "Vec");
            var trainFile        = FileHelper.GetTestFile("Train-28x28_small.txt");
            var testFile         = FileHelper.GetTestFile("Test-28x28_small.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var loader = env.CreateLoader("Text", new MultiFileSource(trainFile));
                var roles  = env.CreateExamples(loader, "Features", "Label");
                var iova   = string.Format("iovark{{p=ftrank sc={0}}}", singleColumn ? "+" : "-");
                loader = env.CreateLoader("Text", new MultiFileSource(testFile));
                var trainer = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
        static void TrainMultiToBinaryPredictorSparse(bool singleColumn, bool checkError)
        {
            var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           "lr", singleColumn ? "C" : "Vec");
            var trainFile        = FileHelper.GetTestFile("Train-28x28_small.txt");
            var testFile         = FileHelper.GetTestFile("Test-28x28_small.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(trainFile));
                var roles  = env.CreateExamples(loader, "Features", "Label");
                var df     = DataFrameIO.ReadView(roles.Data);
                Assert.IsTrue(df.Shape[0] > 0);
                var iova = string.Format("iova{{p=lr sc={0} nt=1}}", singleColumn ? "+" : "-");
                loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(testFile));
                var trainer = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
        static void TrainMultiToRankerPredictorDense(string modelName, int threads, bool checkError,
                                                     bool singleColumn, bool shift, bool useUint)
        {
            var methodName = string.Format("{0}-{1}-V{2}-T{3}-S{4}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           modelName, singleColumn ? "C" : "Vec", threads, shift ? "shift" : "std");
            var dataFilePath = shift
                ? FileHelper.GetTestFile("mc_iris_shift.txt")
                : FileHelper.GetTestFile("mc_iris.txt");
            var trainFile        = FileHelper.GetOutputFile("iris_train.idv", methodName);
            var testFile         = FileHelper.GetOutputFile("iris_test.idv", methodName);
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0);
            {
                string labelType    = useUint ? "U4[0-2]" : "R4";
                string loadSettings = string.Format("Text{{col=Label:{0}:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}}", labelType);
                var    loader       = env.CreateLoader(loadSettings, new MultiFileSource(dataFilePath));

                var    concat      = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var    roles       = env.CreateExamples(concat, "Features", "Label");
                string modelDef    = threads <= 0 ? modelName : string.Format("{0}{{t={1}}}", modelName, threads);
                string additionnal = modelName.Contains("xgbrk") ? " u4=+" : "";
                string iova        = string.Format("iovark{{p={0} sc={1}{2}}}", modelDef, singleColumn ? "+" : "-", additionnal);
                var    trainer     = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
        public void TestI_PolynomialTransformSerialize()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = host.CreateStreamingDataView(inputs);
                var       data   = host.CreateTransform("poly{col=poly:X d=3}", loader);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestI_ULabelToR4LabelTransform()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("iris_binary.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var loader = env.CreateLoader("Text{col=LabelText:TX:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var concat  = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var labelTx = env.CreateTransform("TermTransform{col=LabelU4:LabelText}", concat);
                var labelR4 = env.CreateTransform("U2R4{col=Label:LabelU4}", labelTx);
                var roles   = env.CreateExamples(labelR4, "Features", "Label");
                var trainer = env.CreateTrainer("lr");
                using (var ch = env.Start("test"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                trainer.Trainer.PredictionKind, true, ratio: 0.8f);
                }
            }
        }
        public void TestDataViewCacheDataFrameSerializationCacheFile()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("mc_iris.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
            var outModelFilePath   = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var cacheFile          = FileHelper.GetOutputFile("cacheFile.idv", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));
                var sorted = env.CreateTransform(string.Format("cachedf{{r=+ df=- f={0}}}", cacheFile), loader);
                StreamHelper.SaveModel(env, sorted, outModelFilePath);

                using (var fs = File.OpenRead(outModelFilePath))
                {
                    var deserializedData = env.LoadTransforms(fs, loader);
                    var saver            = env.CreateSaver("Text");
                    using (var fs2 = File.Create(outputDataFilePath))
                        saver.SaveData(fs2, deserializedData,
                                       StreamHelper.GetColumnsIndex(deserializedData.Schema, new[] { "Label", "Slength", "Swidth", "Plength", "Pwidth" }));
                }

                if (!File.Exists(cacheFile))
                {
                    throw new FileNotFoundException(cacheFile);
                }
            }
        }
Esempio n. 19
0
        private void ScikitAPI_SimpleTransform_Load(bool removeFirstTransform)
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var output     = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName);
            var inputs     = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            string expected = null;

            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data = host.CreateStreamingDataView(inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host))
                {
                    var predictor = pipe.Train(data);
                    Assert.IsTrue(predictor != null);
                    var data2       = host.CreateStreamingDataView(inputs2);
                    var predictions = pipe.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    expected = dfs2;
                    Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25");
                    pipe.Save(output, removeFirstTransform);
                }
            }
            using (var host = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var data2 = host.CreateStreamingDataView(inputs2);
                using (var pipe2 = new ScikitPipeline(output, host))
                {
                    var predictions = pipe2.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(expected, dfs2);
                }
            }
        }
        public void TestLambdaColumnPassThroughTransform()
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host,
                                                                                               "Lambda", data, "X", "XX", new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               (in VBuffer <float> src, ref VBuffer <float> dst) =>
                {
                    dst           = new VBuffer <float>(2, new float[2]);
                    dst.Values[0] = src.Values[0] + 1f;
                    dst.Values[1] = src.Values[1] + 1f;
                });
        private IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[] {
                    new TextLoader.Column("Label", DataKind.BL, 0),
                    new TextLoader.Column("SentimentText", DataKind.Text, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Arguments()
            {
                Column = new TextFeaturizingEstimator.Column
                {
                    Name   = "Features",
                    Source = new[] { "SentimentText" }
                },
                KeepDiacritics               = false,
                KeepPunctuations             = false,
                TextCase                     = TextNormalizingEstimator.CaseNormalizationMode.Lower,
                OutputTokens                 = true,
                UsePredefinedStopWordRemover = true,
                VectorNormalizer             = normalize ? TextFeaturizingEstimator.TextNormKind.L2 : TextFeaturizingEstimator.TextNormKind.None,
                CharFeatureExtractor         = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 2, AllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            using (var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, args).Read(new MultiFileSource(trainFilename));
                var trans  = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaBinaryTrainer(env, new SdcaBinaryTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached    = new CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
            public PredictionEngineExample(string modelName)
            {
                _env = EnvHelper.NewTestEnvironment();
                var transformer = TransformerChain.LoadFromLegacy(_env, File.OpenRead(modelName));
                var model       = new ModelOperationsCatalog(_env);

                _predictor = model.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer);
            }
        public void TestDBScanTransform()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("three_classes_2d.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
            var outModelFilePath   = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                //var loader = env.CreateLoader("text{col=RowId:I4:0 col=Features:R4:1-2 header=+}", new MultiFileSource(dataFilePath));
                var loader = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    HasHeader = true,
                    Column    = new[] { TextLoader.Column.Parse("RowId:R4:0"),
                                        TextLoader.Column.Parse("Features:R4:1-2") }
                },
                                               new MultiFileSource(dataFilePath));
                var xf = env.CreateTransform("DBScan{col=Features}", loader);

                string schema = SchemaHelper.ToString(xf.Schema);
                if (string.IsNullOrEmpty(schema))
                {
                    throw new Exception("Schema is null.");
                }
                if (!schema.Contains("Cluster"))
                {
                    throw new Exception("Schema does not contain Cluster.");
                }
                if (!schema.Contains("Score"))
                {
                    throw new Exception("Schema does not contain Score.");
                }

                StreamHelper.SaveModel(env, xf, outModelFilePath);

                var saver = env.CreateSaver("Text{header=- schema=-}");
                using (var fs2 = File.Create(outputDataFilePath))
                    saver.SaveData(fs2, TestTransformHelper.AddFlatteningTransform(env, xf),
                                   StreamHelper.GetColumnsIndex(xf.Schema, new[] { "Features", "ClusterId", "Score" }));

                // Checking the values.
                var lines = File.ReadAllLines(outputDataFilePath).Select(c => c.Split('\t')).Where(c => c.Length == 4);
                if (!lines.Any())
                {
                    throw new Exception(string.Format("The output file is empty or not containing three columns '{0}'", outputDataFilePath));
                }
                var clusters = lines.Select(c => c[1]).Distinct();
                if (clusters.Count() <= 1)
                {
                    throw new Exception("Only one cluster, this is unexpected.");
                }

                // Serialization.
                var outData  = FileHelper.GetOutputFile("outData1.txt", methodName);
                var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf, loader, outData, outData2);
            }
        }
Esempio n. 24
0
        public void TestScikitAPI_SimplePredictor()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                    var data2       = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2));
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"));
                }
            }
        }
        public void TestSelectTagContactViewTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var firstData  = FileHelper.GetOutputFile("first.idv", methodName);
            var outData    = FileHelper.GetOutputFile("outData.txt", methodName);
            var outData2   = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1, 4 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 7 }
                    }
                };

                // Create IDV
                IDataView loader = env.CreateStreamingDataView(inputs);
                var       saver  = ComponentCreation.CreateSaver(env, "binary");
                using (var ch = env.Start("save"))
                {
                    using (var fs0 = env.CreateOutputFile(firstData))
                        DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true);

                    // Create parallel pipeline
                    loader = env.CreateStreamingDataView(inputs);
                    var data = env.CreateTransform("Scaler{col=X1:X}", loader);
                    data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data);
                    data = env.CreateTransform("Scaler{col=X1:X}", data);
                    var merged = env.CreateTransform("append{t=first}", data);

                    // Save the outcome
                    var text    = env.CreateSaver("Text");
                    var columns = new int[merged.Schema.Count];
                    for (int i = 0; i < columns.Length; ++i)
                    {
                        columns[i] = i;
                    }
                    using (var fs2 = File.Create(outData))
                        text.SaveData(fs2, merged, columns);

                    // Final checking
                    var lines = File.ReadAllLines(outData);
                    if (!lines.Any())
                    {
                        throw new Exception("Empty file.");
                    }
                    if (lines.Length != 9)
                    {
                        throw new Exception("Some lines are missing.");
                    }
                }
            }
        }
Esempio n. 26
0
        private static IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Options()
            {
                Separators = new[] { '\t' },
                HasHeader  = true,
                Columns    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Boolean, 0),
                    new TextLoader.Column("SentimentText", DataKind.String, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Options()
            {
                KeepDiacritics         = false,
                KeepPunctuations       = false,
                CaseMode               = TextNormalizingEstimator.CaseMode.Lower,
                OutputTokensColumnName = "tokens",
                Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None,
                CharFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 3, UseAllLengths = false
                },
                WordFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 2, UseAllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1);
            {
                // Pipeline
                var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename));

                var trans = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, new SdcaLogisticRegressionBinaryTrainer.Options
                {
                    LabelColumnName   = "Label",
                    FeatureColumnName = "Features"
                });

                var cached    = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
Esempio n. 27
0
        public void TrainTestPipelinePredictTransform()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var pipe = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                pipe = env.CreateTransform("SplitTrainTest{col=base tag=train tag=test}", pipe);
                pipe = env.CreateTransform("SelectTag{tag=unused selectTag=train}", pipe);
                pipe = env.CreateTransform(string.Format("TagTrainScore{{tag=trainP out={0} tr=mlr}}", outModelFilePath), pipe);
                pipe = env.CreateTransform("SelectTag{tag=scoredTrain selectTag=test}", pipe);
                pipe = env.CreateTransform("TagPredict{in=trainP}", pipe);

                string schema  = SchemaHelper.ToString(pipe.Schema);
                var    cursor  = pipe.GetRowCursor(i => true);
                string schema2 = SchemaHelper.ToString(cursor.Schema);
                if (schema != schema2)
                {
                    throw new Exception("Schema mismatch.");
                }
                long count = DataViewUtils.ComputeRowCount(pipe);
                if (count != 49)
                {
                    throw new Exception(string.Format("Unexpected number of rows {0}", count));
                }

                // Checks the outputs.
                var saver   = env.CreateSaver("Text");
                var columns = new string[pipe.Schema.Count];
                for (int i = 0; i < columns.Length; ++i)
                {
                    columns[i] = pipe.Schema[i].Name;
                }
                using (var fs2 = File.Create(outData))
                    saver.SaveData(fs2, pipe, StreamHelper.GetColumnsIndex(pipe.Schema));

                var lines = File.ReadAllLines(outData);
                if (lines.Length < 40)
                {
                    throw new Exception("Something is missing:" + string.Join("\n", lines));
                }
                if (lines.Length > 70)
                {
                    throw new Exception("Too much data:" + string.Join("\n", lines));
                }

                TestTransformHelper.SerializationTestTransform(env, outModelFilePath, pipe, loader, outData, outData2);
            }
        }
Esempio n. 28
0
            public PredictionEngineExample(string modelName)
            {
                _env = EnvHelper.NewTestEnvironment();

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, new FloatVectorInput[] { });
                var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(_env, File.OpenRead(modelName),
                                                                           new EmptyDataView(_env, view.Schema));
                var transformer = new TransformWrapper(_env, pipe);

                _predictor = _env.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer);
            }
Esempio n. 29
0
 public void Init(string modelName)
 {
     try
     {
         using (var env = EnvHelper.NewTestEnvironment())
             engine = new ValueMapperPredictionEngineFloat(env, modelName, "Probability");
     }
     catch (Exception e)
     {
         throw new Exception("erreur", e);
     }
 }
        public void TestDataSplitTrainTestSerialization()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var cacheFile        = FileHelper.GetOutputFile("outputDataFilePath.idv", methodName);
            var trainFile        = FileHelper.GetOutputFile("iris_train.idv", methodName);
            var testFile         = FileHelper.GetOutputFile("iris_test.idv", methodName);
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var args = new SplitTrainTestTransform.Arguments
                {
                    newColumn = "Part",
                    cacheFile = cacheFile,
                    filename  = new string[] { trainFile, testFile },
                    reuse     = true
                };

                var transformedData = new SplitTrainTestTransform(env, args, loader);
                StreamHelper.SaveModel(env, transformedData, outModelFilePath);

                using (var fs = File.OpenRead(outModelFilePath))
                {
                    var deserializedData = ModelFileUtils.LoadTransforms(env, loader, fs);
                    var saver            = env.CreateSaver("Text");
                    var columns          = new int[deserializedData.Schema.Count];
                    for (int i = 0; i < columns.Length; ++i)
                    {
                        columns[i] = i;
                    }
                    using (var fs2 = File.Create(outData))
                        saver.SaveData(fs2, deserializedData, columns);
                }

                if (!File.Exists(cacheFile))
                {
                    throw new FileNotFoundException(cacheFile);
                }
                if (!File.Exists(trainFile))
                {
                    throw new FileNotFoundException(trainFile);
                }
                if (!File.Exists(testFile))
                {
                    throw new FileNotFoundException(testFile);
                }
            }
        }