public void GenTrainData(Problem prob) { X = prob.X; Y = prob.Y; _maxDim = prob.MaxDim; _n = prob.N; }
public static Model Train(Problem prob, TrainingArg arg) { List<double> labels = GetLabels(prob); if (labels.Count > 2) { List<BinaryClassifier> classifiers = new List<BinaryClassifier>(labels.Count); foreach (double label in labels) { Console.WriteLine("{0} vs all:",label); BinaryClassifier classifier = BinaryClassifier.Train(prob, arg, label); classifiers.Add(classifier); Console.WriteLine("finish"); } Model model = new Model(); model.NumberOfClasses = classifiers.Count; model.BinaryClassifiers = classifiers.ToArray(); return model; } else if (labels.Count == 2) { BinaryClassifier[] classifiers = new BinaryClassifier[1]; classifiers[0] = BinaryClassifier.Train(prob, arg, labels[0], labels[1]); Model model = new Model(); model.NumberOfClasses = 2; model.BinaryClassifiers = classifiers; return model; } else throw new Exception(Messege.CouldNotClassify); }
private static List<double> GetLabels(Problem prob) { List<double> labels = new List<double>(); for (int n = 0; n < prob.N; n++) { double y = prob.Y[n]; if (!labels.Contains(y)) { labels.Add(y); } } return labels; }
/// <summary> /// Train posLabel vs negLabel. /// when negLabel is NaN, Train posLabel vs other. /// </summary> /// <param name="prob">The training data</param> /// <param name="arg">The training argument</param> /// <param name="posLabel">positive label</param> /// <param name="negLabel">negative label</param> /// <returns>BinaryClassifier</returns> public static BinaryClassifier Train(Problem prob, TrainingArg arg, double posLabel, double negLabel = double.NaN) { Problem binaryProb = CreatBinaryProblem(prob, posLabel, negLabel); Assembly asm = Assembly.GetAssembly(typeof(StrongLeaner)); StrongLeaner strongLearner = (StrongLeaner)asm.CreateInstance(typeof(StrongLeaner).Namespace + "." + arg.StrongLearnerName); strongLearner.Train(binaryProb, arg.WeakLearnerName, arg.WeakLearnerArgs, arg.Iterations); return new BinaryClassifier(posLabel, negLabel, strongLearner); }
private static Problem CreatBinaryProblem(Problem prob, double posLabel, double negLabel) { List<Node[]> X_pos = new List<Node[]>(); List<Node[]> X_neg = new List<Node[]>(); for (int n = 0; n < prob.N; n++) { if (prob.Y[n] == posLabel) X_pos.Add(prob.X[n]); else if (prob.Y[n] == negLabel || double.IsNaN(negLabel)) X_neg.Add(prob.X[n]); } double[] Y = new double[X_pos.Count + X_neg.Count]; for (int i = 0; i < X_pos.Count; i++) Y[i] = 1; for (int i = X_pos.Count; i < Y.Length; i++) Y[i] = -1; X_pos.AddRange(X_neg); Node[][] X = X_pos.ToArray(); return new Problem(Y.Length, Y, X, prob.MaxDim); }
/// <summary> /// Writes a problem to a file. This will overwrite any previous data in the file. /// </summary> /// <param name="filename">The file to write to</param> /// <param name="problem">The problem to write</param> public static void Write(string filename, Problem problem) { FileStream output = File.Open(filename, FileMode.Create); try { Write(output, problem); } finally { output.Close(); } }
/// <summary> /// Writes a problem to a stream. /// </summary> /// <param name="stream">The stream to write the problem to.</param> /// <param name="problem">The problem to write.</param> public static void Write(Stream stream, Problem problem) { TemporaryCulture.Start(); StreamWriter output = new StreamWriter(stream); for (int i = 0; i < problem.N; i++) { output.Write(problem.Y[i]); for (int j = 0; j < problem.X[i].Length; j++) output.Write(" {0}:{1}", problem.X[i][j].Dim, problem.X[i][j].Value); output.WriteLine(); } output.Flush(); TemporaryCulture.Stop(); }
public void Train(Problem prob, string weakLearnerName, string[] weakLearnerArgs, int iter) { //Creat weaklearner and traindata Assembly asm = Assembly.GetAssembly(typeof(WeakLearner)); WeakLearner srcLearner = (WeakLearner)asm.CreateInstance(typeof(WeakLearner).Namespace + "." + weakLearnerName, true); srcLearner.InitLearningOptions(weakLearnerArgs); TrainData traindata = srcLearner.CreateTrainData(prob); // set the smoothing value to avoid numerical problem 1/N //"Improved boosting algorithms using confidence-rated predictions". chapter 4.2 double smoothingVal = 1.0 / traindata.N; //init weight double[] weight = new double[traindata.N]; for (int t=0; t < weight.Length; t++) { weight[t] = smoothingVal; } //show sth Console.WriteLine("\tStrongLearner:{0}", this.GetType().Name); Console.WriteLine("\tWeakLearner:{0}", weakLearnerName); int cursorX = Console.CursorLeft; int cursorY = Console.CursorTop; //start iterating _weakLearners = new SortedList<int, WeakLearner>(iter); for (int t = 0; t < iter; t++) { //creat a new learner from srcLearner WeakLearner subLearner = (WeakLearner)asm.CreateInstance(srcLearner.GetType().FullName);//srcLearner.CreateSubLearner(); //init args again subLearner.InitLearningOptions(weakLearnerArgs); //train the learner(the suboptimal solution with current weight) double Pm = subLearner.Train(traindata, weight); if (Pm >= 0.5) { throw new Exception(Messege.CouldNotClassify); } //calculate Alpha //note : eps_min = Pm , eps_pls = 1-Pm double eps_min = 0.0, eps_pls = 0.0; double[] result = new double[prob.N]; for (int n = 0; n < prob.N; n++ ) { result[n] = subLearner.Classify(prob.X[n]); if ((result[n] * prob.Y[n]) < 0) eps_min += weight[n]; if ((result[n] * prob.Y[n]) > 0) eps_pls += weight[n]; } double Alpha = 0.5 * Math.Log((eps_pls + smoothingVal) / (eps_min + smoothingVal)); subLearner.Alpha = Alpha; //update weight double Z = 0; for (int n = 0; n < prob.N; n++ ) { weight[n] = weight[n] * Math.Exp(-1 * prob.Y[n] * result[n] * Alpha); Z += weight[n]; } for (int n = 0; n < prob.N; n++ ) weight[n] /= Z; //test double sum = 0; for (int n = 0; n < prob.N; n++) sum += weight[n]; //save _weakLearners.Add(t,subLearner); //show sth Console.SetCursorPosition(cursorX, cursorY); Console.WriteLine("\titerations {0}/{1}", t+1, iter); } }