public void TestI_ULabelToR4LabelTransform()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("iris_binary.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: 1))
            {
                var loader = env.CreateLoader("Text{col=LabelText:TX:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var concat  = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var labelTx = env.CreateTransform("TermTransform{col=LabelU4:LabelText}", concat);
                var labelR4 = env.CreateTransform("U2R4{col=Label:LabelU4}", labelTx);
                var roles   = env.CreateExamples(labelR4, "Features", "Label");
                var trainer = env.CreateTrainer("lr");
                using (var ch = env.Start("test"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                trainer.Trainer.PredictionKind, true, ratio: 0.8f);
                }
            }
        }
        static void TrainPrePostProcessTrainer(string modelName, bool checkError, int threads, bool addpre)
        {
            var methodName       = string.Format("{0}-{1}-T{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, modelName, threads);
            var dataFilePath     = FileHelper.GetTestFile("mc_iris.txt");
            var trainFile        = FileHelper.GetOutputFile("iris_train.idv", methodName);
            var testFile         = FileHelper.GetOutputFile("iris_test.idv", methodName);
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0))
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));
                var xf = env.CreateTransform("shuffle{force=+}", loader); // We shuffle because Iris is order by label.
                xf = env.CreateTransform("concat{col=Features:Slength,Swidth}", xf);
                var roles = env.CreateExamples(xf, "Features", "Label");

                string pred = addpre ? "PrePost{pre=poly{col=Features} p=___ pret=Take{n=80}}" : "PrePost{p=___ pret=Take{n=80}}";
                pred = pred.Replace("___", modelName);
                var trainer = env.CreateTrainer(pred);
                using (var ch = env.Start("Train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MultiClassClassification, checkError, ratio: 0.15f);
                }
            }
        }
Exemplo n.º 3
0
        public static void TrainkNNMultiClassification(int k, NearestNeighborsWeights weight, int threads, float ratio = 0.2f,
                                                       string distance = "L2")
        {
            var methodName       = string.Format("{0}-k{1}-W{2}-T{3}-D{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, k, weight, threads, distance);
            var dataFilePath     = FileHelper.GetTestFile("iris.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            var env = k == 1 ? EnvHelper.NewTestEnvironment(conc: 1) : EnvHelper.NewTestEnvironment();

            using (env)
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var    concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var    roles  = env.CreateExamples(concat, "Features", "Label");
                string modelDef;
                modelDef = string.Format("knnmc{{k={0} weighting={1} nt={2} distance={3}}}", k,
                                         weight == NearestNeighborsWeights.distance ? "distance" : "uniform", threads, distance);
                var trainer = env.CreateTrainer(modelDef);
                using (var ch = env.Start("test"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                PredictionKind.MultiClassClassification, true, ratio: ratio);
                }
            }
        }
Exemplo n.º 4
0
        public static void TrainMultiToRankerPredictorSparse(bool singleColumn, bool checkError)
        {
            var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           "lr", singleColumn ? "C" : "Vec");
            var trainFile        = FileHelper.GetTestFile("Train-28x28_small.txt");
            var testFile         = FileHelper.GetTestFile("Test-28x28_small.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var loader = env.CreateLoader("Text", new MultiFileSource(trainFile));
                var roles  = env.CreateExamples(loader, "Features", "Label");
                var iova   = string.Format("iovark{{p=ftrank sc={0}}}", singleColumn ? "+" : "-");
                loader = env.CreateLoader("Text", new MultiFileSource(testFile));
                var trainer = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
Exemplo n.º 5
0
        static void TrainMultiToBinaryPredictorSparse(bool singleColumn, bool checkError)
        {
            var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           "lr", singleColumn ? "C" : "Vec");
            var trainFile        = FileHelper.GetTestFile("Train-28x28_small.txt");
            var testFile         = FileHelper.GetTestFile("Test-28x28_small.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(trainFile));
                var roles  = env.CreateExamples(loader, "Features", "Label");
                var df     = DataFrameIO.ReadView(roles.Data);
                Assert.IsTrue(df.Shape[0] > 0);
                var iova = string.Format("iova{{p=lr sc={0} nt=1}}", singleColumn ? "+" : "-");
                loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(testFile));
                var trainer = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
Exemplo n.º 6
0
        static void TrainMultiToRankerPredictorDense(string modelName, int threads, bool checkError,
                                                     bool singleColumn, bool shift, bool useUint)
        {
            var methodName = string.Format("{0}-{1}-V{2}-T{3}-S{4}", System.Reflection.MethodBase.GetCurrentMethod().Name,
                                           modelName, singleColumn ? "C" : "Vec", threads, shift ? "shift" : "std");
            var dataFilePath = shift
                ? FileHelper.GetTestFile("mc_iris_shift.txt")
                : FileHelper.GetTestFile("mc_iris.txt");
            var trainFile        = FileHelper.GetOutputFile("iris_train.idv", methodName);
            var testFile         = FileHelper.GetOutputFile("iris_test.idv", methodName);
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0);
            {
                string labelType    = useUint ? "U4[0-2]" : "R4";
                string loadSettings = string.Format("Text{{col=Label:{0}:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}}", labelType);
                var    loader       = env.CreateLoader(loadSettings, new MultiFileSource(dataFilePath));

                var    concat      = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader);
                var    roles       = env.CreateExamples(concat, "Features", "Label");
                string modelDef    = threads <= 0 ? modelName : string.Format("{0}{{t={1}}}", modelName, threads);
                string additionnal = modelName.Contains("xgbrk") ? " u4=+" : "";
                string iova        = string.Format("iovark{{p={0} sc={1}{2}}}", modelDef, singleColumn ? "+" : "-", additionnal);
                var    trainer     = env.CreateTrainer(iova);
                using (var ch = env.Start("train"))
                {
                    var predictor = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, checkError, ratio: 0.1f);
                }
            }
        }
Exemplo n.º 7
0
        static void OptimizedOVA(float downsampling, string type, string model)
        {
            var    methodName = string.Format("{0}-D{1}-{2}-{3}", System.Reflection.MethodBase.GetCurrentMethod().Name, downsampling, type, model);
            string trainFile, testFile;

            if (type == "R4")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test.idv");
            }
            else if (type == "U4")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train_u4.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test_u4.idv");
            }
            else if (type == "U43")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train_u43.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test_u43.idv");
            }
            else
            {
                throw new NotSupportedException();
            }

            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            StringWriter sout, serr;
            /*using (*/
            var env = EnvHelper.NewTestEnvironment(out sout, out serr, verbose: false);
            {
                var loaderSettings = "Binary";
                var loader         = env.CreateLoader(loaderSettings, new MultiFileSource(trainFile));
                var xf             = env.CreateTransform("concat{col=Features:Slength,Swidth}", loader);
                var roles          = env.CreateExamples(xf, "Features", "Label");
                var trainer        = env.CreateTrainer(string.Format("oova{{p={1} ds={0}}}", downsampling, model));
                using (var ch = env.Start("Train"))
                {
                    var pred  = trainer.Train(env, ch, roles);
                    var sbout = sout.GetStringBuilder().ToString();
                    var sbrr  = serr.GetStringBuilder().ToString();
                    loader = env.CreateLoader(loaderSettings, new MultiFileSource(testFile));
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, true, ratio: 0.8f);
                }
            }
        }
        public void TestChainTransformSerializeWithKMeans()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath     = FileHelper.GetTestFile("iris_binary.txt");
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-4 header=+}",
                                              new MultiFileSource(dataFilePath));

                var xf      = env.CreateTransform("ChainTrans{xf1=Scaler{col=Features} xf2=Scaler{col=Features}}", loader);
                var roles   = env.CreateExamples(xf, "Features");
                var trainer = env.CreateTrainer("KMeansPlusPlus{k=5}");
                using (var ch = env.Start("Train"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                PredictionKind.Clustering, false);
                }
            }
        }
Exemplo n.º 9
0
        static void TrainMultiToBinaryPredictorIris(int th, bool singleColumn, string model, string type)
        {
            var    methodName = string.Format("{0}-T{1}-{2}-{3}-{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, th, singleColumn ? "asvec" : "asR4", model, type);
            string trainFile, testFile;

            if (type == "R4")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test.idv");
            }
            else if (type == "U4")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train_u4.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test_u4.idv");
            }
            else if (type == "U43")
            {
                trainFile = FileHelper.GetTestFile("types/iris_train_u43.idv");
                testFile  = FileHelper.GetTestFile("types/iris_test_u43.idv");
            }
            else
            {
                throw new NotSupportedException();
            }

            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var outData          = FileHelper.GetOutputFile("outData1.txt", methodName);
            var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(conc: th == 1 ? 1 : 0);
            {
                var loaderSettings = "Binary";
                var loader         = env.CreateLoader(loaderSettings, new MultiFileSource(trainFile));
                var xf             = env.CreateTransform("concat{col=Features:Slength,Swidth}", loader);
                var roles          = env.CreateExamples(xf, "Features", "Label");
                ITrainerExtended trainer;
                if (model.ToLower() == "ova" || model.ToLower() == "oova")
                {
                    if (th > 0)
                    {
                        trainer = env.CreateTrainer(string.Format("oova{{ p=ft{{t={0}}} }}", th, singleColumn ? "+" : "-"));
                    }
                    else
                    {
                        trainer = env.CreateTrainer(string.Format("oova{{p=ft{{t=1}} }}", singleColumn ? "+" : "-"));
                    }
                }
                else
                {
                    if (th > 0)
                    {
                        trainer = env.CreateTrainer(string.Format("iova{{ p=ft{{t={0}}} sc={1} }}", th, singleColumn ? "+" : "-"));
                    }
                    else
                    {
                        trainer = env.CreateTrainer(string.Format("iova{{p=ft{{t=1}} sc={0} }}", singleColumn ? "+" : "-"));
                    }
                }

                using (var ch = env.Start("Train"))
                {
                    var pred = trainer.Train(env, ch, roles);
                    loader = env.CreateLoader(loaderSettings, new MultiFileSource(testFile));
                    TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2,
                                                                PredictionKind.MulticlassClassification, true,
                                                                ratio: type.StartsWith("U4") && model.ToLower() == "iova" ? 1f : 0.1f);
                }
            }
        }