public void TestI_ULabelToR4LabelTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("iris_binary.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var loader = env.CreateLoader("Text{col=LabelText:TX:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}", new MultiFileSource(dataFilePath)); var concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader); var labelTx = env.CreateTransform("TermTransform{col=LabelU4:LabelText}", concat); var labelR4 = env.CreateTransform("U2R4{col=Label:LabelU4}", labelTx); var roles = env.CreateExamples(labelR4, "Features", "Label"); var trainer = env.CreateTrainer("lr"); using (var ch = env.Start("test")) { var pred = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2, trainer.Trainer.PredictionKind, true, ratio: 0.8f); } } }
static void TrainPrePostProcessTrainer(string modelName, bool checkError, int threads, bool addpre) { var methodName = string.Format("{0}-{1}-T{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, modelName, threads); var dataFilePath = FileHelper.GetTestFile("mc_iris.txt"); var trainFile = FileHelper.GetOutputFile("iris_train.idv", methodName); var testFile = FileHelper.GetOutputFile("iris_test.idv", methodName); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); using (var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0)) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}", new MultiFileSource(dataFilePath)); var xf = env.CreateTransform("shuffle{force=+}", loader); // We shuffle because Iris is order by label. xf = env.CreateTransform("concat{col=Features:Slength,Swidth}", xf); var roles = env.CreateExamples(xf, "Features", "Label"); string pred = addpre ? "PrePost{pre=poly{col=Features} p=___ pret=Take{n=80}}" : "PrePost{p=___ pret=Take{n=80}}"; pred = pred.Replace("___", modelName); var trainer = env.CreateTrainer(pred); using (var ch = env.Start("Train")) { var predictor = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2, PredictionKind.MultiClassClassification, checkError, ratio: 0.15f); } } }
public static void TrainkNNMultiClassification(int k, NearestNeighborsWeights weight, int threads, float ratio = 0.2f, string distance = "L2") { var methodName = string.Format("{0}-k{1}-W{2}-T{3}-D{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, k, weight, threads, distance); var dataFilePath = FileHelper.GetTestFile("iris.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); var env = k == 1 ? EnvHelper.NewTestEnvironment(conc: 1) : EnvHelper.NewTestEnvironment(); using (env) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}", new MultiFileSource(dataFilePath)); var concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader); var roles = env.CreateExamples(concat, "Features", "Label"); string modelDef; modelDef = string.Format("knnmc{{k={0} weighting={1} nt={2} distance={3}}}", k, weight == NearestNeighborsWeights.distance ? "distance" : "uniform", threads, distance); var trainer = env.CreateTrainer(modelDef); using (var ch = env.Start("test")) { var pred = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2, PredictionKind.MultiClassClassification, true, ratio: ratio); } } }
public static void TrainMultiToRankerPredictorSparse(bool singleColumn, bool checkError) { var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, "lr", singleColumn ? "C" : "Vec"); var trainFile = FileHelper.GetTestFile("Train-28x28_small.txt"); var testFile = FileHelper.GetTestFile("Test-28x28_small.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var loader = env.CreateLoader("Text", new MultiFileSource(trainFile)); var roles = env.CreateExamples(loader, "Features", "Label"); var iova = string.Format("iovark{{p=ftrank sc={0}}}", singleColumn ? "+" : "-"); loader = env.CreateLoader("Text", new MultiFileSource(testFile)); var trainer = env.CreateTrainer(iova); using (var ch = env.Start("train")) { var predictor = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2, PredictionKind.MulticlassClassification, checkError, ratio: 0.1f); } } }
static void TrainMultiToBinaryPredictorSparse(bool singleColumn, bool checkError) { var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, "lr", singleColumn ? "C" : "Vec"); var trainFile = FileHelper.GetTestFile("Train-28x28_small.txt"); var testFile = FileHelper.GetTestFile("Test-28x28_small.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1); { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(trainFile)); var roles = env.CreateExamples(loader, "Features", "Label"); var df = DataFrameIO.ReadView(roles.Data); Assert.IsTrue(df.Shape[0] > 0); var iova = string.Format("iova{{p=lr sc={0} nt=1}}", singleColumn ? "+" : "-"); loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(testFile)); var trainer = env.CreateTrainer(iova); using (var ch = env.Start("train")) { var predictor = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2, PredictionKind.MulticlassClassification, checkError, ratio: 0.1f); } } }
static void TrainMultiToRankerPredictorDense(string modelName, int threads, bool checkError, bool singleColumn, bool shift, bool useUint) { var methodName = string.Format("{0}-{1}-V{2}-T{3}-S{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, modelName, singleColumn ? "C" : "Vec", threads, shift ? "shift" : "std"); var dataFilePath = shift ? FileHelper.GetTestFile("mc_iris_shift.txt") : FileHelper.GetTestFile("mc_iris.txt"); var trainFile = FileHelper.GetOutputFile("iris_train.idv", methodName); var testFile = FileHelper.GetOutputFile("iris_test.idv", methodName); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: threads == 1 ? 1 : 0); { string labelType = useUint ? "U4[0-2]" : "R4"; string loadSettings = string.Format("Text{{col=Label:{0}:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}}", labelType); var loader = env.CreateLoader(loadSettings, new MultiFileSource(dataFilePath)); var concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader); var roles = env.CreateExamples(concat, "Features", "Label"); string modelDef = threads <= 0 ? modelName : string.Format("{0}{{t={1}}}", modelName, threads); string additionnal = modelName.Contains("xgbrk") ? " u4=+" : ""; string iova = string.Format("iovark{{p={0} sc={1}{2}}}", modelDef, singleColumn ? "+" : "-", additionnal); var trainer = env.CreateTrainer(iova); using (var ch = env.Start("train")) { var predictor = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2, PredictionKind.MulticlassClassification, checkError, ratio: 0.1f); } } }
static void OptimizedOVA(float downsampling, string type, string model) { var methodName = string.Format("{0}-D{1}-{2}-{3}", System.Reflection.MethodBase.GetCurrentMethod().Name, downsampling, type, model); string trainFile, testFile; if (type == "R4") { trainFile = FileHelper.GetTestFile("types/iris_train.idv"); testFile = FileHelper.GetTestFile("types/iris_test.idv"); } else if (type == "U4") { trainFile = FileHelper.GetTestFile("types/iris_train_u4.idv"); testFile = FileHelper.GetTestFile("types/iris_test_u4.idv"); } else if (type == "U43") { trainFile = FileHelper.GetTestFile("types/iris_train_u43.idv"); testFile = FileHelper.GetTestFile("types/iris_test_u43.idv"); } else { throw new NotSupportedException(); } var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); StringWriter sout, serr; /*using (*/ var env = EnvHelper.NewTestEnvironment(out sout, out serr, verbose: false); { var loaderSettings = "Binary"; var loader = env.CreateLoader(loaderSettings, new MultiFileSource(trainFile)); var xf = env.CreateTransform("concat{col=Features:Slength,Swidth}", loader); var roles = env.CreateExamples(xf, "Features", "Label"); var trainer = env.CreateTrainer(string.Format("oova{{p={1} ds={0}}}", downsampling, model)); using (var ch = env.Start("Train")) { var pred = trainer.Train(env, ch, roles); var sbout = sout.GetStringBuilder().ToString(); var sbrr = serr.GetStringBuilder().ToString(); loader = env.CreateLoader(loaderSettings, new MultiFileSource(testFile)); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2, PredictionKind.MulticlassClassification, true, ratio: 0.8f); } } }
public void TestChainTransformSerializeWithKMeans() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("iris_binary.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); using (var env = EnvHelper.NewTestEnvironment()) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-4 header=+}", new MultiFileSource(dataFilePath)); var xf = env.CreateTransform("ChainTrans{xf1=Scaler{col=Features} xf2=Scaler{col=Features}}", loader); var roles = env.CreateExamples(xf, "Features"); var trainer = env.CreateTrainer("KMeansPlusPlus{k=5}"); using (var ch = env.Start("Train")) { var pred = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2, PredictionKind.Clustering, false); } } }
static void TrainMultiToBinaryPredictorIris(int th, bool singleColumn, string model, string type) { var methodName = string.Format("{0}-T{1}-{2}-{3}-{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, th, singleColumn ? "asvec" : "asR4", model, type); string trainFile, testFile; if (type == "R4") { trainFile = FileHelper.GetTestFile("types/iris_train.idv"); testFile = FileHelper.GetTestFile("types/iris_test.idv"); } else if (type == "U4") { trainFile = FileHelper.GetTestFile("types/iris_train_u4.idv"); testFile = FileHelper.GetTestFile("types/iris_test_u4.idv"); } else if (type == "U43") { trainFile = FileHelper.GetTestFile("types/iris_train_u43.idv"); testFile = FileHelper.GetTestFile("types/iris_test_u43.idv"); } else { throw new NotSupportedException(); } var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: th == 1 ? 1 : 0); { var loaderSettings = "Binary"; var loader = env.CreateLoader(loaderSettings, new MultiFileSource(trainFile)); var xf = env.CreateTransform("concat{col=Features:Slength,Swidth}", loader); var roles = env.CreateExamples(xf, "Features", "Label"); ITrainerExtended trainer; if (model.ToLower() == "ova" || model.ToLower() == "oova") { if (th > 0) { trainer = env.CreateTrainer(string.Format("oova{{ p=ft{{t={0}}} }}", th, singleColumn ? "+" : "-")); } else { trainer = env.CreateTrainer(string.Format("oova{{p=ft{{t=1}} }}", singleColumn ? "+" : "-")); } } else { if (th > 0) { trainer = env.CreateTrainer(string.Format("iova{{ p=ft{{t={0}}} sc={1} }}", th, singleColumn ? "+" : "-")); } else { trainer = env.CreateTrainer(string.Format("iova{{p=ft{{t=1}} sc={0} }}", singleColumn ? "+" : "-")); } } using (var ch = env.Start("Train")) { var pred = trainer.Train(env, ch, roles); loader = env.CreateLoader(loaderSettings, new MultiFileSource(testFile)); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, pred, roles, outData, outData2, PredictionKind.MulticlassClassification, true, ratio: type.StartsWith("U4") && model.ToLower() == "iova" ? 1f : 0.1f); } } }