/** * @param target predicted classes */ public static void crossValidation(Problem prob, Parameter param, int nr_fold, double[] target) { int i; int[] fold_start = new int[nr_fold + 1]; int l = prob.l; int[] perm = new int[l]; for (i = 0; i < l; i++) perm[i] = i; for (i = 0; i < l; i++) { int j = i + random.Next(l - i); swap(perm, i, j); } for (i = 0; i <= nr_fold; i++) fold_start[i] = i * l / nr_fold; for (i = 0; i < nr_fold; i++) { int begin = fold_start[i]; int end = fold_start[i + 1]; int j, k; Problem subprob = new Problem(); subprob.bias = prob.bias; subprob.n = prob.n; subprob.l = l - (end - begin); subprob.x = new Feature[subprob.l][]; subprob.y = new double[subprob.l]; k = 0; for (j = 0; j < begin; j++) { subprob.x[k] = prob.x[perm[j]]; subprob.y[k] = prob.y[perm[j]]; ++k; } for (j = end; j < l; j++) { subprob.x[k] = prob.x[perm[j]]; subprob.y[k] = prob.y[perm[j]]; ++k; } Model submodel = train(subprob, param); for (j = begin; j < end; j++) target[perm[j]] = predict(submodel, prob.x[perm[j]]); } }
internal void parse_command_line(string[] argv) { int i; // eps: see setting below Parameter = new Parameter(SolverType.getById(SolverType.L2R_L2LOSS_SVC_DUAL), 1, Double.PositiveInfinity, 0.1); // default values Bias = -1; cross_validation = false; // parse options for (i = 0; i < argv.Length; i++) { if (argv[i][0] != '-') break; if (++i >= argv.Length) exit_with_help(); switch (argv[i - 1][1]) { case 's': Parameter.solverType = SolverType.getById(Linear.atoi(argv[i])); break; case 'c': Parameter.setC(Linear.atof(argv[i])); break; case 'p': Parameter.setP(Linear.atof(argv[i])); break; case 'e': Parameter.setEps(Linear.atof(argv[i])); break; case 'B': Bias = Linear.atof(argv[i]); break; case 'w': int weightLabel = int.Parse(argv[i - 1].Substring(2)); double weight = double.Parse(argv[i]); Parameter.weightLabel = addToArray(Parameter.weightLabel, weightLabel); Parameter.weight = addToArray(Parameter.weight, weight); break; case 'v': cross_validation = true; nr_fold = int.Parse(argv[i]); if (nr_fold < 2) { Console.Error.WriteLine("n-fold cross validation: n must >= 2"); exit_with_help(); } break; case 'q': i--; Linear.disableDebugOutput(); break; default: Console.Error.WriteLine("unknown option"); exit_with_help(); break; } } // determine filenames if (i >= argv.Length) exit_with_help(); inputFilename = argv[i]; if (i < argv.Length - 1) modelFilename = argv[i + 1]; else { int p = argv[i].LastIndexOf('/'); ++p; // whew... modelFilename = argv[i].Substring(p) + ".model"; } if (Parameter.eps == Double.PositiveInfinity) { switch (Parameter.solverType.getId()) { case SolverType.L2R_LR: case SolverType.L2R_L2LOSS_SVC: Parameter.setEps(0.01); break; case SolverType.L2R_L2LOSS_SVR: Parameter.setEps(0.001); break; case SolverType.L2R_L2LOSS_SVC_DUAL: case SolverType.L2R_L1LOSS_SVC_DUAL: case SolverType.MCSVM_CS: case SolverType.L2R_LR_DUAL: Parameter.setEps(0.1); break; case SolverType.L1R_L2LOSS_SVC: case SolverType.L1R_LR: Parameter.setEps(0.01); break; case SolverType.L2R_L1LOSS_SVR_DUAL: case SolverType.L2R_L2LOSS_SVR_DUAL: Parameter.setEps(0.1); break; default: throw new InvalidOperationException("unknown solver type: " + Parameter.solverType); } } }
/** * A coordinate descent algorithm for * L1-loss and L2-loss epsilon-SVR dual problem * * min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i, * s.t. -upper_bound_i <= \beta_i <= upper_bound_i, * * where Qij = xi^T xj and * D is a diagonal matrix * * In L1-SVM case: * upper_bound_i = C * lambda_i = 0 * In L2-SVM case: * upper_bound_i = INF * lambda_i = 1/(2*C) * * Given: * x, y, p, C * eps is the stopping tolerance * * solution will be put in w * * See Algorithm 4 of Ho and Lin, 2012 */ private static void solve_l2r_l1l2_svr(Problem prob, double[] w, Parameter param) { int l = prob.l; double C = param.C; double p = param.p; int w_size = prob.n; double eps = param.eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int[] index = new int[l]; double d, G, H; double Gmax_old = Double.PositiveInfinity; double Gmax_new, Gnorm1_new; double Gnorm1_init = 0; // initialize to 0 to get rid of Eclipse warning/error double[] beta = new double[l]; double[] QD = new double[l]; double[] y = prob.y; // L2R_L2LOSS_SVR_DUAL double[] lambda = new double[] { 0.5 / C }; double[] upper_bound = new double[] { Double.PositiveInfinity }; if (param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL) { lambda[0] = 0; upper_bound[0] = C; } // Initial beta can be set here. Note that // -upper_bound <= beta[i] <= upper_bound for (i = 0; i < l; i++) beta[i] = 0; for (i = 0; i < w_size; i++) w[i] = 0; for (i = 0; i < l; i++) { QD[i] = 0; foreach (Feature xi in prob.x[i]) { double val = xi.Value; QD[i] += val * val; w[xi.Index - 1] += beta[i] * val; } index[i] = i; } while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i = 0; i < active_size; i++) { int j = i + random.Next(active_size - i); swap(index, i, j); } for (s = 0; s < active_size; s++) { i = index[s]; G = -y[i] + lambda[GETI_SVR(i)] * beta[i]; H = QD[i] + lambda[GETI_SVR(i)]; foreach (Feature xi in prob.x[i]) { int ind = xi.Index - 1; double val = xi.Value; G += val * w[ind]; } double Gp = G + p; double Gn = G - p; double violation = 0; if (beta[i] == 0) { if (Gp < 0) violation = -Gp; else if (Gn > 0) violation = Gn; else if (Gp > Gmax_old && Gn < -Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] >= upper_bound[GETI_SVR(i)]) { if (Gp > 0) violation = Gp; else if (Gp < -Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] <= -upper_bound[GETI_SVR(i)]) { if (Gn < 0) violation = -Gn; else if (Gn > Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] > 0) violation = Math.Abs(Gp); else violation = Math.Abs(Gn); Gmax_new = Math.Max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if (Gp < H * beta[i]) d = -Gp / H; else if (Gn > H * beta[i]) d = -Gn / H; else d = -beta[i]; if (Math.Abs(d) < 1.0e-12) continue; double beta_old = beta[i]; beta[i] = Math.Min(Math.Max(beta[i] + d, -upper_bound[GETI_SVR(i)]), upper_bound[GETI_SVR(i)]); d = beta[i] - beta_old; if (d != 0) { foreach (Feature xi in prob.x[i]) { w[xi.Index - 1] += d * xi.Value; } } } if (iter == 0) Gnorm1_init = Gnorm1_new; iter++; if (iter % 10 == 0) info("."); if (Gnorm1_new <= eps * Gnorm1_init) { if (active_size == l) break; else { active_size = l; info("*"); Gmax_old = Double.PositiveInfinity; continue; } } Gmax_old = Gmax_new; } info("noptimization finished, #iter = {0}", iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n"); // calculate objective value double v = 0; int nSV = 0; for (i = 0; i < w_size; i++) v += w[i] * w[i]; v = 0.5 * v; for (i = 0; i < l; i++) { v += p * Math.Abs(beta[i]) - y[i] * beta[i] + 0.5 * lambda[GETI_SVR(i)] * beta[i] * beta[i]; if (beta[i] != 0) nSV++; } info("Objective value = {0}", v); info("nSV = {0}", nSV); }
private static void train_one(Problem prob, Parameter param, double[] w, double Cp, double Cn) { double eps = param.eps; int pos = 0; for (int i = 0; i < prob.l; i++) if (prob.y[i] > 0) { pos++; } int neg = prob.l - pos; double primal_solver_tol = eps * Math.Max(Math.Min(pos, neg), 1) / prob.l; IFunction fun_obj = null; switch (param.solverType.getId()) { case SolverType.L2R_LR: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) { if (prob.y[i] > 0) C[i] = Cp; else C[i] = Cn; } fun_obj = new L2R_LrFunction(prob, C); Tron tron_obj = new Tron(fun_obj, primal_solver_tol); tron_obj.tron(w); break; } case SolverType.L2R_L2LOSS_SVC: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) { if (prob.y[i] > 0) C[i] = Cp; else C[i] = Cn; } fun_obj = new L2R_L2_SvcFunction(prob, C); Tron tron_obj = new Tron(fun_obj, primal_solver_tol); tron_obj.tron(w); break; } case SolverType.L2R_L2LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L2LOSS_SVC_DUAL)); break; case SolverType.L2R_L1LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L1LOSS_SVC_DUAL)); break; case SolverType.L1R_L2LOSS_SVC: { Problem prob_col = transpose(prob); solve_l1r_l2_svc(prob_col, w, primal_solver_tol, Cp, Cn); break; } case SolverType.L1R_LR: { Problem prob_col = transpose(prob); solve_l1r_lr(prob_col, w, primal_solver_tol, Cp, Cn); break; } case SolverType.L2R_LR_DUAL: solve_l2r_lr_dual(prob, w, eps, Cp, Cn); break; case SolverType.L2R_L2LOSS_SVR: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) C[i] = param.C; fun_obj = new L2R_L2_SvrFunction(prob, C, param.p); Tron tron_obj = new Tron(fun_obj, param.eps); tron_obj.tron(w); break; } case SolverType.L2R_L1LOSS_SVR_DUAL: case SolverType.L2R_L2LOSS_SVR_DUAL: solve_l2r_l1l2_svr(prob, w, param); break; default: throw new InvalidOperationException("unknown solver type: " + param.solverType); } }
/** * @throws IllegalArgumentException if the feature nodes of prob are not sorted in ascending order */ public static Model train(Problem prob, Parameter param) { if (prob == null) throw new ArgumentNullException("problem must not be null"); if (param == null) throw new ArgumentNullException("parameter must not be null"); if (prob.n == 0) throw new ArgumentNullException("problem has zero features"); if (prob.l == 0) throw new ArgumentNullException("problem has zero instances"); foreach (Feature[] nodes in prob.x) { int indexBefore = 0; foreach (Feature n_ in nodes) { if (n_.Index <= indexBefore) { throw new ArgumentException("feature nodes must be sorted by index in ascending order"); } indexBefore = n_.Index; } } int l = prob.l; int n = prob.n; int w_size = prob.n; Model model = new Model(); if (prob.bias >= 0) model.nr_feature = n - 1; else model.nr_feature = n; model.solverType = param.solverType; model.bias = prob.bias; if (param.solverType.getId() == SolverType.L2R_L2LOSS_SVR || // param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL || // param.solverType.getId() == SolverType.L2R_L2LOSS_SVR_DUAL) { model.w = new double[w_size]; model.nr_class = 2; model.label = null; checkProblemSize(n, model.nr_class); train_one(prob, param, model.w, 0, 0); } else { int[] perm = new int[l]; // group training data of the same class GroupClassesReturn rv = groupClasses(prob, perm); int nr_class = rv.nr_class; int[] label = rv.label; int[] start = rv.start; int[] count = rv.count; checkProblemSize(n, nr_class); model.nr_class = nr_class; model.label = new int[nr_class]; for (int i = 0; i < nr_class; i++) model.label[i] = label[i]; // calculate weighted C double[] weighted_C = new double[nr_class]; for (int i = 0; i < nr_class; i++) weighted_C[i] = param.C; for (int i = 0; i < param.getNumWeights(); i++) { int j; for (j = 0; j < nr_class; j++) if (param.weightLabel[i] == label[j]) break; if (j == nr_class) throw new ArgumentException("class label " + param.weightLabel[i] + " specified in weight is not found"); weighted_C[j] *= param.weight[i]; } // constructing the subproblem Feature[][] x = new Feature[l][]; for (int i = 0; i < l; i++) x[i] = prob.x[perm[i]]; Problem sub_prob = new Problem(); sub_prob.l = l; sub_prob.n = n; sub_prob.x = new Feature[sub_prob.l][]; sub_prob.y = new double[sub_prob.l]; for (int k = 0; k < sub_prob.l; k++) sub_prob.x[k] = x[k]; // multi-class svm by Crammer and Singer if (param.solverType.getId() == SolverType.MCSVM_CS) { model.w = new double[n * nr_class]; for (int i = 0; i < nr_class; i++) { for (int j = start[i]; j < start[i] + count[i]; j++) { sub_prob.y[j] = i; } } SolverMCSVM_CS solver = new SolverMCSVM_CS(sub_prob, nr_class, weighted_C, param.eps); solver.solve(model.w); } else { if (nr_class == 2) { model.w = new double[w_size]; int e0 = start[0] + count[0]; int k = 0; for (; k < e0; k++) sub_prob.y[k] = +1; for (; k < sub_prob.l; k++) sub_prob.y[k] = -1; train_one(sub_prob, param, model.w, weighted_C[0], weighted_C[1]); } else { model.w = new double[w_size * nr_class]; double[] w = new double[w_size]; for (int i = 0; i < nr_class; i++) { int si = start[i]; int ei = si + count[i]; int k = 0; for (; k < si; k++) sub_prob.y[k] = -1; for (; k < ei; k++) sub_prob.y[k] = +1; for (; k < sub_prob.l; k++) sub_prob.y[k] = -1; train_one(sub_prob, param, w, weighted_C[i], param.C); for (int j = 0; j < n; j++) model.w[j * nr_class + i] = w[j]; } } } } return model; }