public L2R_L2_SvcFunction(Problem prob, double[] C)
            int l = prob.l;

            this.prob = prob;

            z = new double[l];
            I = new int[l];
            this.C = C;
 public SolverMCSVM_CS( Problem prob, int nr_class, double[] weighted_C, double eps, int max_iter ) {
     this.w_size = prob.n;
     this.l = prob.l;
     this.nr_class = nr_class;
     this.eps = eps;
     this.max_iter = max_iter;
     this.prob = prob;
     this.C = weighted_C;
     this.B = new double[nr_class];
     this.G = new double[nr_class];
    public L2R_LrFunction( Problem prob, double[] C ) {
        int l = prob.l;

        this.prob = prob;

        z = new double[l];
        D = new double[l];
        this.C = C;
         * @param target predicted classes
        public static void crossValidation(Problem prob, Parameter param, int nr_fold, double[] target)
            int i;
            int[] fold_start = new int[nr_fold + 1];
            int l = prob.l;
            int[] perm = new int[l];

            for (i = 0; i < l; i++)
                perm[i] = i;
            for (i = 0; i < l; i++)
                int j = i + random.Next(l - i);
                swap(perm, i, j);
            for (i = 0; i <= nr_fold; i++)
                fold_start[i] = i * l / nr_fold;

            for (i = 0; i < nr_fold; i++)
                int begin = fold_start[i];
                int end = fold_start[i + 1];
                int j, k;
                Problem subprob = new Problem();

                subprob.bias = prob.bias;
                subprob.n = prob.n;
                subprob.l = l - (end - begin);
                subprob.x = new Feature[subprob.l][];
                subprob.y = new double[subprob.l];

                k = 0;
                for (j = 0; j < begin; j++)
                    subprob.x[k] = prob.x[perm[j]];
                    subprob.y[k] = prob.y[perm[j]];
                for (j = end; j < l; j++)
                    subprob.x[k] = prob.x[perm[j]];
                    subprob.y[k] = prob.y[perm[j]];
                Model submodel = train(subprob, param);
                for (j = begin; j < end; j++)
                    target[perm[j]] = predict(submodel, prob.x[perm[j]]);
        public static string check_parameter(Problem prob, Parameters param)
            if (param.Epsilon <= 0)
                return "eps <= 0";

            if (param.Complexity <= 0)
                return "C <= 0";

            if (param.Epsilon < 0)
                return "p < 0";

            if (!Enum.IsDefined(typeof(LibSvmSolverType), param.Solver))
                return "unknown solver type";

            if (param.CrossValidation)
                return "cross-validation is not supported at this time.";

            return null;
 public SolverMCSVM_CS( Problem prob, int nr_class, double[] C, double eps ) : this(prob, nr_class, C, eps, 100000) {
 public SolverMCSVM_CS( Problem prob, int nr_class, double[] C ) : this(prob, nr_class, C, 0.1) {
        private static GroupClassesReturn groupClasses(Problem prob, int[] perm)
            int l = prob.l;
            int max_nr_class = 16;
            int nr_class = 0;

            int[] label = new int[max_nr_class];
            int[] count = new int[max_nr_class];
            int[] data_label = new int[l];
            int i;

            for (i = 0; i < l; i++)
                int this_label = (int)prob.y[i];
                int j;
                for (j = 0; j < nr_class; j++)
                    if (this_label == label[j])
                data_label[i] = j;
                if (j == nr_class)
                    if (nr_class == max_nr_class)
                        max_nr_class *= 2;
                        label = copyOf(label, max_nr_class);
                        count = copyOf(count, max_nr_class);
                    label[nr_class] = this_label;
                    count[nr_class] = 1;

            int[] start = new int[nr_class];
            start[0] = 0;
            for (i = 1; i < nr_class; i++)
                start[i] = start[i - 1] + count[i - 1];
            for (i = 0; i < l; i++)
                perm[start[data_label[i]]] = i;
            start[0] = 0;
            for (i = 1; i < nr_class; i++)
                start[i] = start[i - 1] + count[i - 1];

            return new GroupClassesReturn(nr_class, label, start, count);
        private static Problem constructProblem(List<Double> vy, List<Feature[]> vx, int max_index, double bias) {
            Problem prob = new Problem();
            prob.bias = bias;
            prob.l = vy.Count;
            prob.n = max_index;
            if (bias >= 0) {
            prob.x = new Feature[prob.l][];
            for (int i = 0; i < prob.l; i++) {
                prob.x[i] = vx[i];

                if (bias >= 0) {
                    Debug.Assert(prob.x[i][prob.x[i].Length - 1] == null);
                    prob.x[i][prob.x[i].Length - 1] = new Feature(max_index + 1, bias);

            prob.y = new double[prob.l];
            for (int i = 0; i < prob.l; i++)
                prob.y[i] = vy[i];

            return prob;
         * A coordinate descent algorithm for
         * L1-loss and L2-loss SVM dual problems
         *  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
         *    s.t.      0 <= \alpha_i <= upper_bound_i,
         *  where Qij = yi yj xi^T xj and
         *  D is a diagonal matrix
         * In L1-SVM case:
         *     upper_bound_i = Cp if y_i = 1
         *      upper_bound_i = Cn if y_i = -1
         *      D_ii = 0
         * In L2-SVM case:
         *      upper_bound_i = INF
         *      D_ii = 1/(2*Cp) if y_i = 1
         *      D_ii = 1/(2*Cn) if y_i = -1
         * Given:
         * x, y, Cp, Cn
         * eps is the stopping tolerance
         * solution will be put in w
         * See Algorithm 3 of Hsieh et al., ICML 2008
        private static void solve_l2r_l1l2_svc(Problem prob, double[] w, double eps, double Cp, double Cn, SolverType solver_type)
            int l = prob.l;
            int w_size = prob.n;
            int i, s, iter = 0;
            double C, d, G;
            double[] QD = new double[l];
            int max_iter = 1000;
            int[] index = new int[l];
            double[] alpha = new double[l];
            sbyte[] y = new sbyte[l];
            int active_size = l;

            // PG: projected gradient, for shrinking and stopping
            double PG;
            double PGmax_old = Double.PositiveInfinity;
            double PGmin_old = Double.NegativeInfinity;
            double PGmax_new, PGmin_new;

            // default solver_type: L2R_L2LOSS_SVC_DUAL
            double[] diag = new[] { 0.5 / Cn, 0, 0.5 / Cp };
            double[] upper_bound = new double[] { Double.PositiveInfinity, 0, Double.PositiveInfinity };
            if (solver_type.getId() == SolverType.L2R_L1LOSS_SVC_DUAL)
                diag[0] = 0;
                diag[2] = 0;
                upper_bound[0] = Cn;
                upper_bound[2] = Cp;

            for (i = 0; i < l; i++)
                if (prob.y[i] > 0)
                    y[i] = +1;
                    y[i] = -1;

            // Initial alpha can be set here. Note that
            // 0 <= alpha[i] <= upper_bound[GETI(i)]
            for (i = 0; i < l; i++)
                alpha[i] = 0;

            for (i = 0; i < w_size; i++)
                w[i] = 0;
            for (i = 0; i < l; i++)
                QD[i] = diag[GETI(y, i)];

                foreach (Feature xi in prob.x[i])
                    double val = xi.Value;
                    QD[i] += val * val;
                    w[xi.Index - 1] += y[i] * alpha[i] * val;
                index[i] = i;

            while (iter < max_iter)
                PGmax_new = Double.NegativeInfinity;
                PGmin_new = Double.PositiveInfinity;

                for (i = 0; i < active_size; i++)
                    int j = i + random.Next(active_size - i);
                    swap(index, i, j);

                for (s = 0; s < active_size; s++)
                    i = index[s];
                    G = 0;
                    sbyte yi = y[i];

                    foreach (Feature xi in prob.x[i])
                        G += w[xi.Index - 1] * xi.Value;
                    G = G * yi - 1;

                    C = upper_bound[GETI(y, i)];
                    G += alpha[i] * diag[GETI(y, i)];

                    PG = 0;
                    if (alpha[i] == 0)
                        if (G > PGmax_old)
                            swap(index, s, active_size);
                        else if (G < 0)
                            PG = G;
                    else if (alpha[i] == C)
                        if (G < PGmin_old)
                            swap(index, s, active_size);
                        else if (G > 0)
                            PG = G;
                        PG = G;

                    PGmax_new = Math.Max(PGmax_new, PG);
                    PGmin_new = Math.Min(PGmin_new, PG);

                    if (Math.Abs(PG) > 1.0e-12)
                        double alpha_old = alpha[i];
                        alpha[i] = Math.Min(Math.Max(alpha[i] - G / QD[i], 0.0), C);
                        d = (alpha[i] - alpha_old) * yi;

                        foreach (Feature xi in prob.x[i])
                            w[xi.Index - 1] += d * xi.Value;

                if (iter % 10 == 0) info(".");

                if (PGmax_new - PGmin_new <= eps)
                    if (active_size == l)
                        active_size = l;
                        PGmax_old = Double.PositiveInfinity;
                        PGmin_old = Double.NegativeInfinity;
                PGmax_old = PGmax_new;
                PGmin_old = PGmin_new;
                if (PGmax_old <= 0) PGmax_old = Double.PositiveInfinity;
                if (PGmin_old >= 0) PGmin_old = Double.NegativeInfinity;

            info("\noptimization finished, #iter = {0}", iter);
            if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n");

            // calculate objective value

            double v = 0;
            int nSV = 0;
            for (i = 0; i < w_size; i++)
                v += w[i] * w[i];
            for (i = 0; i < l; i++)
                v += alpha[i] * (alpha[i] * diag[GETI(y, i)] - 2);
                if (alpha[i] > 0) ++nSV;
            info("Objective value = {0}", v / 2);
            info("nSV = {0}", nSV);
        private static void train_one(Problem prob, Parameter param, double[] w, double Cp, double Cn)
            double eps = param.eps;
            int pos = 0;
            for (int i = 0; i < prob.l; i++)
                if (prob.y[i] > 0)
            int neg = prob.l - pos;

            double primal_solver_tol = eps * Math.Max(Math.Min(pos, neg), 1) / prob.l;

            IFunction fun_obj = null;
            switch (param.solverType.getId())
                case SolverType.L2R_LR:
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                            if (prob.y[i] > 0)
                                C[i] = Cp;
                                C[i] = Cn;
                        fun_obj = new L2R_LrFunction(prob, C);
                        Tron tron_obj = new Tron(fun_obj, primal_solver_tol);
                case SolverType.L2R_L2LOSS_SVC:
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                            if (prob.y[i] > 0)
                                C[i] = Cp;
                                C[i] = Cn;
                        fun_obj = new L2R_L2_SvcFunction(prob, C);
                        Tron tron_obj = new Tron(fun_obj, primal_solver_tol);
                case SolverType.L2R_L2LOSS_SVC_DUAL:
                    solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L2LOSS_SVC_DUAL));
                case SolverType.L2R_L1LOSS_SVC_DUAL:
                    solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L1LOSS_SVC_DUAL));
                case SolverType.L1R_L2LOSS_SVC:
                        Problem prob_col = transpose(prob);
                        solve_l1r_l2_svc(prob_col, w, primal_solver_tol, Cp, Cn);
                case SolverType.L1R_LR:
                        Problem prob_col = transpose(prob);
                        solve_l1r_lr(prob_col, w, primal_solver_tol, Cp, Cn);
                case SolverType.L2R_LR_DUAL:
                    solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
                case SolverType.L2R_L2LOSS_SVR:
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                            C[i] = param.C;

                        fun_obj = new L2R_L2_SvrFunction(prob, C, param.p);
                        Tron tron_obj = new Tron(fun_obj, param.eps);
                case SolverType.L2R_L1LOSS_SVR_DUAL:
                case SolverType.L2R_L2LOSS_SVR_DUAL:
                    solve_l2r_l1l2_svr(prob, w, param);

                    throw new InvalidOperationException("unknown solver type: " + param.solverType);
         * @throws IllegalArgumentException if the feature nodes of prob are not sorted in ascending order
        public static Model train(Problem prob, Parameter param)
            if (prob == null) throw new ArgumentNullException("problem must not be null");
            if (param == null) throw new ArgumentNullException("parameter must not be null");

            if (prob.n == 0) throw new ArgumentNullException("problem has zero features");
            if (prob.l == 0) throw new ArgumentNullException("problem has zero instances");

            foreach (Feature[] nodes in prob.x)
                int indexBefore = 0;
                foreach (Feature n_ in nodes)
                    if (n_.Index <= indexBefore)
                        throw new ArgumentException("feature nodes must be sorted by index in ascending order");
                    indexBefore = n_.Index;

            int l = prob.l;
            int n = prob.n;
            int w_size = prob.n;
            Model model = new Model();

            if (prob.bias >= 0)
                model.nr_feature = n - 1;
                model.nr_feature = n;

            model.solverType = param.solverType;
            model.bias = prob.bias;

            if (param.solverType.getId() == SolverType.L2R_L2LOSS_SVR || //
                param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL || //
                param.solverType.getId() == SolverType.L2R_L2LOSS_SVR_DUAL)
                model.w = new double[w_size];
                model.nr_class = 2;
                model.label = null;

                checkProblemSize(n, model.nr_class);

                train_one(prob, param, model.w, 0, 0);
                int[] perm = new int[l];

                // group training data of the same class
                GroupClassesReturn rv = groupClasses(prob, perm);
                int nr_class = rv.nr_class;
                int[] label = rv.label;
                int[] start = rv.start;
                int[] count = rv.count;

                checkProblemSize(n, nr_class);

                model.nr_class = nr_class;
                model.label = new int[nr_class];
                for (int i = 0; i < nr_class; i++)
                    model.label[i] = label[i];

                // calculate weighted C
                double[] weighted_C = new double[nr_class];
                for (int i = 0; i < nr_class; i++)
                    weighted_C[i] = param.C;
                for (int i = 0; i < param.getNumWeights(); i++)
                    int j;
                    for (j = 0; j < nr_class; j++)
                        if (param.weightLabel[i] == label[j]) break;

                    if (j == nr_class) throw new ArgumentException("class label " + param.weightLabel[i] + " specified in weight is not found");
                    weighted_C[j] *= param.weight[i];

                // constructing the subproblem
                Feature[][] x = new Feature[l][];
                for (int i = 0; i < l; i++)
                    x[i] = prob.x[perm[i]];

                Problem sub_prob = new Problem();
                sub_prob.l = l;
                sub_prob.n = n;
                sub_prob.x = new Feature[sub_prob.l][];
                sub_prob.y = new double[sub_prob.l];

                for (int k = 0; k < sub_prob.l; k++)
                    sub_prob.x[k] = x[k];

                // multi-class svm by Crammer and Singer
                if (param.solverType.getId() == SolverType.MCSVM_CS)
                    model.w = new double[n * nr_class];
                    for (int i = 0; i < nr_class; i++)
                        for (int j = start[i]; j < start[i] + count[i]; j++)
                            sub_prob.y[j] = i;

                    SolverMCSVM_CS solver = new SolverMCSVM_CS(sub_prob, nr_class, weighted_C, param.eps);
                    if (nr_class == 2)
                        model.w = new double[w_size];

                        int e0 = start[0] + count[0];
                        int k = 0;
                        for (; k < e0; k++)
                            sub_prob.y[k] = +1;
                        for (; k < sub_prob.l; k++)
                            sub_prob.y[k] = -1;

                        train_one(sub_prob, param, model.w, weighted_C[0], weighted_C[1]);
                        model.w = new double[w_size * nr_class];
                        double[] w = new double[w_size];
                        for (int i = 0; i < nr_class; i++)
                            int si = start[i];
                            int ei = si + count[i];

                            int k = 0;
                            for (; k < si; k++)
                                sub_prob.y[k] = -1;
                            for (; k < ei; k++)
                                sub_prob.y[k] = +1;
                            for (; k < sub_prob.l; k++)
                                sub_prob.y[k] = -1;

                            train_one(sub_prob, param, w, weighted_C[i], param.C);

                            for (int j = 0; j < n; j++)
                                model.w[j * nr_class + i] = w[j];
            return model;
        // transpose matrix X from row format to column format
        internal static Problem transpose(Problem prob)
            int l = prob.l;
            int n = prob.n;
            int[] col_ptr = new int[n + 1];
            Problem prob_col = new Problem();
            prob_col.l = l;
            prob_col.n = n;
            prob_col.y = new double[l];
            prob_col.x = new Feature[n][];

            for (int i = 0; i < l; i++)
                prob_col.y[i] = prob.y[i];

            for (int i = 0; i < l; i++)
                foreach (Feature x in prob.x[i])

            for (int i = 0; i < n; i++)
                prob_col.x[i] = new Feature[col_ptr[i + 1]];
                col_ptr[i] = 0; // reuse the array to count the nr of elements

            for (int i = 0; i < l; i++)
                for (int j = 0; j < prob.x[i].Length; j++)
                    Feature x = prob.x[i][j];
                    int index = x.Index - 1;
                    prob_col.x[index][col_ptr[index]] = new Feature(i + 1, x.Value);

            return prob_col;
         * A coordinate descent algorithm for
         * L1-regularized logistic regression problems
         *  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
         * Given:
         * x, y, Cp, Cn
         * eps is the stopping tolerance
         * solution will be put in w
         * See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
         * @since 1.5
        private static void solve_l1r_lr(Problem prob_col, double[] w, double eps, double Cp, double Cn)
            int l = prob_col.l;
            int w_size = prob_col.n;
            int j, s, newton_iter = 0, iter = 0;
            int max_newton_iter = 100;
            int max_iter = 1000;
            int max_num_linesearch = 20;
            int active_size;
            int QP_active_size;

            double nu = 1e-12;
            double inner_eps = 1;
            double sigma = 0.01;
            double w_norm, w_norm_new;
            double z, G, H;
            double Gnorm1_init = 0; // eclipse moans this variable might not be initialized
            double Gmax_old = Double.PositiveInfinity;
            double Gmax_new, Gnorm1_new;
            double QP_Gmax_old = Double.PositiveInfinity;
            double QP_Gmax_new, QP_Gnorm1_new;
            double delta, negsum_xTd, cond;

            int[] index = new int[w_size];
            sbyte[] y = new sbyte[l];
            double[] Hdiag = new double[w_size];
            double[] Grad = new double[w_size];
            double[] wpd = new double[w_size];
            double[] xjneg_sum = new double[w_size];
            double[] xTd = new double[l];
            double[] exp_wTx = new double[l];
            double[] exp_wTx_new = new double[l];
            double[] tau = new double[l];
            double[] D = new double[l];

            double[] C = { Cn, 0, Cp };

            // Initial w can be set here.
            for (j = 0; j < w_size; j++)
                w[j] = 0;

            for (j = 0; j < l; j++)
                if (prob_col.y[j] > 0)
                    y[j] = 1;
                    y[j] = -1;

                exp_wTx[j] = 0;

            w_norm = 0;
            for (j = 0; j < w_size; j++)
                w_norm += Math.Abs(w[j]);
                wpd[j] = w[j];
                index[j] = j;
                xjneg_sum[j] = 0;
                foreach (Feature x in prob_col.x[j])
                    int ind = x.Index - 1;
                    double val = x.Value;
                    exp_wTx[ind] += w[j] * val;
                    if (y[ind] == -1)
                        xjneg_sum[j] += C[GETI(y, ind)] * val;
            for (j = 0; j < l; j++)
                exp_wTx[j] = Math.Exp(exp_wTx[j]);
                double tau_tmp = 1 / (1 + exp_wTx[j]);
                tau[j] = C[GETI(y, j)] * tau_tmp;
                D[j] = C[GETI(y, j)] * exp_wTx[j] * tau_tmp * tau_tmp;

            while (newton_iter < max_newton_iter)
                Gmax_new = 0;
                Gnorm1_new = 0;
                active_size = w_size;

                for (s = 0; s < active_size; s++)
                    j = index[s];
                    Hdiag[j] = nu;
                    Grad[j] = 0;

                    double tmp = 0;
                    foreach (Feature x in prob_col.x[j])
                        int ind = x.Index - 1;
                        Hdiag[j] += x.Value * x.Value * D[ind];
                        tmp += x.Value * tau[ind];
                    Grad[j] = -tmp + xjneg_sum[j];

                    double Gp = Grad[j] + 1;
                    double Gn = Grad[j] - 1;
                    double violation = 0;
                    if (w[j] == 0)
                        if (Gp < 0)
                            violation = -Gp;
                        else if (Gn > 0)
                            violation = Gn;
                        //outer-level shrinking
                        else if (Gp > Gmax_old / l && Gn < -Gmax_old / l)
                            swap(index, s, active_size);
                    else if (w[j] > 0)
                        violation = Math.Abs(Gp);
                        violation = Math.Abs(Gn);

                    Gmax_new = Math.Max(Gmax_new, violation);
                    Gnorm1_new += violation;

                if (newton_iter == 0) Gnorm1_init = Gnorm1_new;

                if (Gnorm1_new <= eps * Gnorm1_init) break;

                iter = 0;
                QP_Gmax_old = Double.PositiveInfinity;
                QP_active_size = active_size;

                for (int i = 0; i < l; i++)
                    xTd[i] = 0;

                // optimize QP over wpd
                while (iter < max_iter)
                    QP_Gmax_new = 0;
                    QP_Gnorm1_new = 0;

                    for (j = 0; j < QP_active_size; j++)
                        int i = random.Next(QP_active_size - j);
                        swap(index, i, j);

                    for (s = 0; s < QP_active_size; s++)
                        j = index[s];
                        H = Hdiag[j];

                        G = Grad[j] + (wpd[j] - w[j]) * nu;
                        foreach (Feature x in prob_col.x[j])
                            int ind = x.Index - 1;
                            G += x.Value * D[ind] * xTd[ind];

                        double Gp = G + 1;
                        double Gn = G - 1;
                        double violation = 0;
                        if (wpd[j] == 0)
                            if (Gp < 0)
                                violation = -Gp;
                            else if (Gn > 0)
                                violation = Gn;
                            //inner-level shrinking
                            else if (Gp > QP_Gmax_old / l && Gn < -QP_Gmax_old / l)
                                swap(index, s, QP_active_size);
                        else if (wpd[j] > 0)
                            violation = Math.Abs(Gp);
                            violation = Math.Abs(Gn);

                        QP_Gmax_new = Math.Max(QP_Gmax_new, violation);
                        QP_Gnorm1_new += violation;

                        // obtain solution of one-variable problem
                        if (Gp < H * wpd[j])
                            z = -Gp / H;
                        else if (Gn > H * wpd[j])
                            z = -Gn / H;
                            z = -wpd[j];

                        if (Math.Abs(z) < 1.0e-12) continue;
                        z = Math.Min(Math.Max(z, -10.0), 10.0);

                        wpd[j] += z;

                        foreach (Feature x in prob_col.x[j])
                            int ind = x.Index - 1;
                            xTd[ind] += x.Value * z;


                    if (QP_Gnorm1_new <= inner_eps * Gnorm1_init)
                        //inner stopping
                        if (QP_active_size == active_size)
                        //active set reactivation
                            QP_active_size = active_size;
                            QP_Gmax_old = Double.PositiveInfinity;

                    QP_Gmax_old = QP_Gmax_new;

                if (iter >= max_iter) info("WARNING: reaching max number of inner iterations");

                delta = 0;
                w_norm_new = 0;
                for (j = 0; j < w_size; j++)
                    delta += Grad[j] * (wpd[j] - w[j]);
                    if (wpd[j] != 0) w_norm_new += Math.Abs(wpd[j]);
                delta += (w_norm_new - w_norm);

                negsum_xTd = 0;
                for (int i = 0; i < l; i++)
                    if (y[i] == -1) negsum_xTd += C[GETI(y, i)] * xTd[i];

                int num_linesearch;
                for (num_linesearch = 0; num_linesearch < max_num_linesearch; num_linesearch++)
                    cond = w_norm_new - w_norm + negsum_xTd - sigma * delta;

                    for (int i = 0; i < l; i++)
                        double exp_xTd = Math.Exp(xTd[i]);
                        exp_wTx_new[i] = exp_wTx[i] * exp_xTd;
                        cond += C[GETI(y, i)] * Math.Log((1 + exp_wTx_new[i]) / (exp_xTd + exp_wTx_new[i]));

                    if (cond <= 0)
                        w_norm = w_norm_new;
                        for (j = 0; j < w_size; j++)
                            w[j] = wpd[j];
                        for (int i = 0; i < l; i++)
                            exp_wTx[i] = exp_wTx_new[i];
                            double tau_tmp = 1 / (1 + exp_wTx[i]);
                            tau[i] = C[GETI(y, i)] * tau_tmp;
                            D[i] = C[GETI(y, i)] * exp_wTx[i] * tau_tmp * tau_tmp;
                        w_norm_new = 0;
                        for (j = 0; j < w_size; j++)
                            wpd[j] = (w[j] + wpd[j]) * 0.5;
                            if (wpd[j] != 0) w_norm_new += Math.Abs(wpd[j]);
                        delta *= 0.5;
                        negsum_xTd *= 0.5;
                        for (int i = 0; i < l; i++)
                            xTd[i] *= 0.5;

                // Recompute some info due to too many line search steps
                if (num_linesearch >= max_num_linesearch)
                    for (int i = 0; i < l; i++)
                        exp_wTx[i] = 0;

                    for (int i = 0; i < w_size; i++)
                        if (w[i] == 0) continue;
                        foreach (Feature x in prob_col.x[i])
                            exp_wTx[x.Index - 1] += w[i] * x.Value;

                    for (int i = 0; i < l; i++)
                        exp_wTx[i] = Math.Exp(exp_wTx[i]);

                if (iter == 1) inner_eps *= 0.25;

                Gmax_old = Gmax_new;

                info("iter {0}  #CD cycles {1}", newton_iter, iter);

            info("optimization finished, #iter = {0}", newton_iter);
            if (newton_iter >= max_newton_iter) info("WARNING: reaching max number of iterations");

            // calculate objective value

            double v = 0;
            int nnz = 0;
            for (j = 0; j < w_size; j++)
                if (w[j] != 0)
                    v += Math.Abs(w[j]);
            for (j = 0; j < l; j++)
                if (y[j] == 1)
                    v += C[GETI(y, j)] * Math.Log(1 + 1 / exp_wTx[j]);
                    v += C[GETI(y, j)] * Math.Log(1 + exp_wTx[j]);

            info("Objective value = {0}", v);
            info("#nonzeros/#features = {0}/{1}", nnz, w_size);
         * A coordinate descent algorithm for
         * L1-regularized L2-loss support vector classification
         *  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
         * Given:
         * x, y, Cp, Cn
         * eps is the stopping tolerance
         * solution will be put in w
         * See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
         * @since 1.5
        private static void solve_l1r_l2_svc(Problem prob_col, double[] w, double eps, double Cp, double Cn)
            int l = prob_col.l;
            int w_size = prob_col.n;
            int j, s, iter = 0;
            int max_iter = 1000;
            int active_size = w_size;
            int max_num_linesearch = 20;

            double sigma = 0.01;
            double d, G_loss, G, H;
            double Gmax_old = Double.PositiveInfinity;
            double Gmax_new, Gnorm1_new;
            double Gnorm1_init = 0; // eclipse moans this variable might not be initialized
            double d_old, d_diff;
            double loss_old = 0; // eclipse moans this variable might not be initialized
            double loss_new;
            double appxcond, cond;

            int[] index = new int[w_size];
            sbyte[] y = new sbyte[l];
            double[] b = new double[l]; // b = 1-ywTx
            double[] xj_sq = new double[w_size];

            double[] C = new[] { Cn, 0, Cp };

            // Initial w can be set here.
            for (j = 0; j < w_size; j++)
                w[j] = 0;

            for (j = 0; j < l; j++)
                b[j] = 1;
                if (prob_col.y[j] > 0)
                    y[j] = 1;
                    y[j] = -1;
            for (j = 0; j < w_size; j++)
                index[j] = j;
                xj_sq[j] = 0;
                foreach (Feature xi in prob_col.x[j])
                    int ind = xi.Index - 1;
                    xi.Value = xi.Value * y[ind]; // x->value stores yi*xij
                    double val = xi.Value;
                    b[ind] -= w[j] * val;

                    xj_sq[j] += C[GETI(y, ind)] * val * val;

            while (iter < max_iter)
                Gmax_new = 0;
                Gnorm1_new = 0;

                for (j = 0; j < active_size; j++)
                    int i = j + random.Next(active_size - j);
                    swap(index, i, j);

                for (s = 0; s < active_size; s++)
                    j = index[s];
                    G_loss = 0;
                    H = 0;

                    foreach (Feature xi in prob_col.x[j])
                        int ind = xi.Index - 1;
                        if (b[ind] > 0)
                            double val = xi.Value;
                            double tmp = C[GETI(y, ind)] * val;
                            G_loss -= tmp * b[ind];
                            H += tmp * val;
                    G_loss *= 2;

                    G = G_loss;
                    H *= 2;
                    H = Math.Max(H, 1e-12);

                    double Gp = G + 1;
                    double Gn = G - 1;
                    double violation = 0;
                    if (w[j] == 0)
                        if (Gp < 0)
                            violation = -Gp;
                        else if (Gn > 0)
                            violation = Gn;
                        else if (Gp > Gmax_old / l && Gn < -Gmax_old / l)
                            swap(index, s, active_size);
                    else if (w[j] > 0)
                        violation = Math.Abs(Gp);
                        violation = Math.Abs(Gn);

                    Gmax_new = Math.Max(Gmax_new, violation);
                    Gnorm1_new += violation;

                    // obtain Newton direction d
                    if (Gp < H * w[j])
                        d = -Gp / H;
                    else if (Gn > H * w[j])
                        d = -Gn / H;
                        d = -w[j];

                    if (Math.Abs(d) < 1.0e-12) continue;

                    double delta = Math.Abs(w[j] + d) - Math.Abs(w[j]) + G * d;
                    d_old = 0;
                    int num_linesearch;
                    for (num_linesearch = 0; num_linesearch < max_num_linesearch; num_linesearch++)
                        d_diff = d_old - d;
                        cond = Math.Abs(w[j] + d) - Math.Abs(w[j]) - sigma * delta;

                        appxcond = xj_sq[j] * d * d + G_loss * d + cond;
                        if (appxcond <= 0)
                            foreach (Feature x in prob_col.x[j])
                                b[x.Index - 1] += d_diff * x.Value;

                        if (num_linesearch == 0)
                            loss_old = 0;
                            loss_new = 0;
                            foreach (Feature x in prob_col.x[j])
                                int ind = x.Index - 1;
                                if (b[ind] > 0)
                                    loss_old += C[GETI(y, ind)] * b[ind] * b[ind];
                                double b_new = b[ind] + d_diff * x.Value;
                                b[ind] = b_new;
                                if (b_new > 0)
                                    loss_new += C[GETI(y, ind)] * b_new * b_new;
                            loss_new = 0;
                            foreach (Feature x in prob_col.x[j])
                                int ind = x.Index - 1;
                                double b_new = b[ind] + d_diff * x.Value;
                                b[ind] = b_new;
                                if (b_new > 0)
                                    loss_new += C[GETI(y, ind)] * b_new * b_new;

                        cond = cond + loss_new - loss_old;
                        if (cond <= 0)
                            d_old = d;
                            d *= 0.5;
                            delta *= 0.5;

                    w[j] += d;

                    // recompute b[] if line search takes too many steps
                    if (num_linesearch >= max_num_linesearch)
                        for (int i = 0; i < l; i++)
                            b[i] = 1;

                        for (int i = 0; i < w_size; i++)
                            if (w[i] == 0) continue;
                            foreach (Feature x in prob_col.x[i])
                                b[x.Index - 1] -= w[i] * x.Value;

                if (iter == 0)
                    Gnorm1_init = Gnorm1_new;
                if (iter % 10 == 0) info(".");

                if (Gmax_new <= eps * Gnorm1_init)
                    if (active_size == w_size)
                        active_size = w_size;
                        Gmax_old = Double.PositiveInfinity;

                Gmax_old = Gmax_new;

            info("optimization finished, #iter = {0}", iter);
            if (iter >= max_iter) info("\nWARNING: reaching max number of iterations");

            // calculate objective value

            double v = 0;
            int nnz = 0;
            for (j = 0; j < w_size; j++)
                foreach (Feature x in prob_col.x[j])
                    x.Value = x.Value * prob_col.y[x.Index - 1]; // restore x->value
                if (w[j] != 0)
                    v += Math.Abs(w[j]);
            for (j = 0; j < l; j++)
                if (b[j] > 0) v += C[GETI(y, j)] * b[j] * b[j];

            info("Objective value = {0}", v);
            info("#nonzeros/#features = {0}/{1}", nnz, w_size);
        public static LibSvmModel train(Problem prob, Parameters parameters)
            double[] w;
            double Cp = parameters.Complexity;
            double Cn = parameters.Complexity;

            if (parameters.ClassWeights != null)
                for (int i = 0; i < parameters.ClassLabels.Count; i++)
                    if (parameters.ClassLabels[i] == -1)
                        Cn *= parameters.ClassWeights[i];

                    else if (parameters.ClassLabels[i] == +1)
                        Cn *= parameters.ClassWeights[i];

            train_one(prob, parameters, out w, Cp, Cn);

            return new LibSvmModel()
                Dimension = prob.Dimensions,
                Classes = 2,
                Labels = new[] { +1, -1 },
                Solver = parameters.Solver,
                Weights = w,
                Bias = 0
        public static void train_one(Problem prob, Parameters param, out double[] w, double Cp, double Cn)
            double[][] inputs = prob.Inputs;
            int[] labels = prob.Outputs.Apply(x => x >= 0 ? 1 : -1);

            double eps = param.Tolerance;

            int pos = 0;
            for (int i = 0; i < labels.Length; i++)
                if (labels[i] >= 0) pos++;
            int neg = prob.Outputs.Length - pos;

            double primal_solver_tol = eps * Math.Max(Math.Min(pos, neg), 1.0) / prob.Inputs.Length;

            SupportVectorMachine svm = new SupportVectorMachine(prob.Dimensions);
            ISupportVectorMachineLearning teacher = null;

            switch (param.Solver)
                case LibSvmSolverType.L2RegularizedLogisticRegression:

                    // l2r_lr_fun
                    teacher = new ProbabilisticNewtonMethod(svm, inputs, labels)
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                        Tolerance = primal_solver_tol
                    }; break;

                case LibSvmSolverType.L2RegularizedL2LossSvc:

                    // fun_obj=new l2r_l2_svc_fun(prob, C);
                    teacher = new LinearNewtonMethod(svm, inputs, labels)
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                        Tolerance = primal_solver_tol
                    }; break;

                case LibSvmSolverType.L2RegularizedL2LossSvcDual:

                    // solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
                    teacher = new LinearCoordinateDescent(svm, inputs, labels)
                        Loss = Loss.L2,
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                    }; break;

                case LibSvmSolverType.L2RegularizedL1LossSvcDual:

                    // solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
                    teacher = new LinearCoordinateDescent(svm, inputs, labels)
                        Loss = Loss.L1,
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                    }; break;

                case LibSvmSolverType.L1RegularizedLogisticRegression:

                    // solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
                    teacher = new ProbabilisticCoordinateDescent(svm, inputs, labels)
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                        Tolerance = primal_solver_tol
                    }; break;

                case LibSvmSolverType.L2RegularizedLogisticRegressionDual:

                    // solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
                    teacher = new ProbabilisticDualCoordinateDescent(svm, inputs, labels)
                        PositiveWeight = Cp,
                        NegativeWeight = Cn,
                        Tolerance = primal_solver_tol,
                    }; break;

            Trace.WriteLine("Training " + param.Solver);
            // run the learning algorithm
            var sw = Stopwatch.StartNew();
            double error = teacher.Run();

            // save the solution
            w = svm.ToWeights();

            Trace.WriteLine(String.Format("Finished {0}: {1} in {2}", 
                param.Solver, error, sw.Elapsed));
         * A coordinate descent algorithm for
         * L1-loss and L2-loss epsilon-SVR dual problem
         *  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
         *    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
         *  where Qij = xi^T xj and
         *  D is a diagonal matrix
         * In L1-SVM case:
         *         upper_bound_i = C
         *         lambda_i = 0
         * In L2-SVM case:
         *         upper_bound_i = INF
         *         lambda_i = 1/(2*C)
         * Given:
         * x, y, p, C
         * eps is the stopping tolerance
         * solution will be put in w
         * See Algorithm 4 of Ho and Lin, 2012
        private static void solve_l2r_l1l2_svr(Problem prob, double[] w, Parameter param)
            int l = prob.l;
            double C = param.C;
            double p = param.p;
            int w_size = prob.n;
            double eps = param.eps;
            int i, s, iter = 0;
            int max_iter = 1000;
            int active_size = l;
            int[] index = new int[l];

            double d, G, H;
            double Gmax_old = Double.PositiveInfinity;
            double Gmax_new, Gnorm1_new;
            double Gnorm1_init = 0; // initialize to 0 to get rid of Eclipse warning/error
            double[] beta = new double[l];
            double[] QD = new double[l];
            double[] y = prob.y;

            // L2R_L2LOSS_SVR_DUAL
            double[] lambda = new double[] { 0.5 / C };
            double[] upper_bound = new double[] { Double.PositiveInfinity };

            if (param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL)
                lambda[0] = 0;
                upper_bound[0] = C;

            // Initial beta can be set here. Note that
            // -upper_bound <= beta[i] <= upper_bound
            for (i = 0; i < l; i++)
                beta[i] = 0;

            for (i = 0; i < w_size; i++)
                w[i] = 0;
            for (i = 0; i < l; i++)
                QD[i] = 0;
                foreach (Feature xi in prob.x[i])
                    double val = xi.Value;
                    QD[i] += val * val;
                    w[xi.Index - 1] += beta[i] * val;

                index[i] = i;

            while (iter < max_iter)
                Gmax_new = 0;
                Gnorm1_new = 0;

                for (i = 0; i < active_size; i++)
                    int j = i + random.Next(active_size - i);
                    swap(index, i, j);

                for (s = 0; s < active_size; s++)
                    i = index[s];
                    G = -y[i] + lambda[GETI_SVR(i)] * beta[i];
                    H = QD[i] + lambda[GETI_SVR(i)];

                    foreach (Feature xi in prob.x[i])
                        int ind = xi.Index - 1;
                        double val = xi.Value;
                        G += val * w[ind];

                    double Gp = G + p;
                    double Gn = G - p;
                    double violation = 0;
                    if (beta[i] == 0)
                        if (Gp < 0)
                            violation = -Gp;
                        else if (Gn > 0)
                            violation = Gn;
                        else if (Gp > Gmax_old && Gn < -Gmax_old)
                            swap(index, s, active_size);
                    else if (beta[i] >= upper_bound[GETI_SVR(i)])
                        if (Gp > 0)
                            violation = Gp;
                        else if (Gp < -Gmax_old)
                            swap(index, s, active_size);
                    else if (beta[i] <= -upper_bound[GETI_SVR(i)])
                        if (Gn < 0)
                            violation = -Gn;
                        else if (Gn > Gmax_old)
                            swap(index, s, active_size);
                    else if (beta[i] > 0)
                        violation = Math.Abs(Gp);
                        violation = Math.Abs(Gn);

                    Gmax_new = Math.Max(Gmax_new, violation);
                    Gnorm1_new += violation;

                    // obtain Newton direction d
                    if (Gp < H * beta[i])
                        d = -Gp / H;
                    else if (Gn > H * beta[i])
                        d = -Gn / H;
                        d = -beta[i];

                    if (Math.Abs(d) < 1.0e-12) continue;

                    double beta_old = beta[i];
                    beta[i] = Math.Min(Math.Max(beta[i] + d, -upper_bound[GETI_SVR(i)]), upper_bound[GETI_SVR(i)]);
                    d = beta[i] - beta_old;

                    if (d != 0)
                        foreach (Feature xi in prob.x[i])
                            w[xi.Index - 1] += d * xi.Value;

                if (iter == 0) Gnorm1_init = Gnorm1_new;
                if (iter % 10 == 0) info(".");

                if (Gnorm1_new <= eps * Gnorm1_init)
                    if (active_size == l)
                        active_size = l;
                        Gmax_old = Double.PositiveInfinity;

                Gmax_old = Gmax_new;

            info("noptimization finished, #iter = {0}", iter);
            if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n");

            // calculate objective value
            double v = 0;
            int nSV = 0;
            for (i = 0; i < w_size; i++)
                v += w[i] * w[i];
            v = 0.5 * v;
            for (i = 0; i < l; i++)
                v += p * Math.Abs(beta[i]) - y[i] * beta[i] + 0.5 * lambda[GETI_SVR(i)] * beta[i] * beta[i];
                if (beta[i] != 0) nSV++;

            info("Objective value = {0}", v);
            info("nSV = {0}", nSV);
 public L2R_L2_SvrFunction( Problem prob, double[] C, double p ) : base(prob, C) {
     this.p = p;
         * A coordinate descent algorithm for
         * the dual of L2-regularized logistic regression problems
         *  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i) ,
         *     s.t.      0 <= \alpha_i <= upper_bound_i,
         *  where Qij = yi yj xi^T xj and
         *  upper_bound_i = Cp if y_i = 1
         *  upper_bound_i = Cn if y_i = -1
         * Given:
         * x, y, Cp, Cn
         * eps is the stopping tolerance
         * solution will be put in w
         * See Algorithm 5 of Yu et al., MLJ 2010
         * @since 1.7
        private static void solve_l2r_lr_dual(Problem prob, double[] w, double eps, double Cp, double Cn)
            int l = prob.l;
            int w_size = prob.n;
            int i, s, iter = 0;
            double[] xTx = new double[l];
            int max_iter = 1000;
            int[] index = new int[l];
            double[] alpha = new double[2 * l]; // store alpha and C - alpha
            sbyte[] y = new sbyte[l];
            int max_inner_iter = 100; // for inner Newton
            double innereps = 1e-2;
            double innereps_min = Math.Min(1e-8, eps);
            double[] upper_bound = new[] { Cn, 0, Cp };

            for (i = 0; i < l; i++)
                if (prob.y[i] > 0)
                    y[i] = +1;
                    y[i] = -1;

            // Initial alpha can be set here. Note that
            // 0 < alpha[i] < upper_bound[GETI(i)]
            // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
            for (i = 0; i < l; i++)
                alpha[2 * i] = Math.Min(0.001 * upper_bound[GETI(y, i)], 1e-8);
                alpha[2 * i + 1] = upper_bound[GETI(y, i)] - alpha[2 * i];

            for (i = 0; i < w_size; i++)
                w[i] = 0;
            for (i = 0; i < l; i++)
                xTx[i] = 0;
                foreach (Feature xi in prob.x[i])
                    double val = xi.Value;
                    xTx[i] += val * val;
                    w[xi.Index - 1] += y[i] * alpha[2 * i] * val;
                index[i] = i;

            while (iter < max_iter)
                for (i = 0; i < l; i++)
                    int j = i + random.Next(l - i);
                    swap(index, i, j);
                int newton_iter = 0;
                double Gmax = 0;
                for (s = 0; s < l; s++)
                    i = index[s];
                    sbyte yi = y[i];
                    double C = upper_bound[GETI(y, i)];
                    double ywTx = 0, xisq = xTx[i];
                    foreach (Feature xi in prob.x[i])
                        ywTx += w[xi.Index - 1] * xi.Value;
                    ywTx *= y[i];
                    double a = xisq, b = ywTx;

                    // Decide to minimize g_1(z) or g_2(z)
                    int ind1 = 2 * i, ind2 = 2 * i + 1, sign = 1;
                    if (0.5 * a * (alpha[ind2] - alpha[ind1]) + b < 0)
                        ind1 = 2 * i + 1;
                        ind2 = 2 * i;
                        sign = -1;

                    //  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
                    double alpha_old = alpha[ind1];
                    double z = alpha_old;
                    if (C - z < 0.5 * C) z = 0.1 * z;
                    double gp = a * (z - alpha_old) + sign * b + Math.Log(z / (C - z));
                    Gmax = Math.Max(Gmax, Math.Abs(gp));

                    // Newton method on the sub-problem
                    const double eta = 0.1; // xi in the paper
                    int inner_iter = 0;
                    while (inner_iter <= max_inner_iter)
                        if (Math.Abs(gp) < innereps) break;
                        double gpp = a + C / (C - z) / z;
                        double tmpz = z - gp / gpp;
                        if (tmpz <= 0)
                            z *= eta;
                            // tmpz in (0, C)
                            z = tmpz;
                        gp = a * (z - alpha_old) + sign * b + Math.Log(z / (C - z));

                    if (inner_iter > 0) // update w
                        alpha[ind1] = z;
                        alpha[ind2] = C - z;
                        foreach (Feature xi in prob.x[i])
                            w[xi.Index - 1] += sign * (z - alpha_old) * yi * xi.Value;

                if (iter % 10 == 0) info(".");

                if (Gmax < eps) break;

                if (newton_iter <= l / 10)
                    innereps = Math.Max(innereps_min, 0.1 * innereps);

            info("noptimization finished, #iter = {0}", iter);
            if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n");

            // calculate objective value
            double v = 0;
            for (i = 0; i < w_size; i++)
                v += w[i] * w[i];
            v *= 0.5;
            for (i = 0; i < l; i++)
                v += alpha[2 * i] * Math.Log(alpha[2 * i]) + alpha[2 * i + 1] * Math.Log(alpha[2 * i + 1]) - upper_bound[GETI(y, i)]
                    * Math.Log(upper_bound[GETI(y, i)]);
            info("Objective value = {0}", v);
        public static void train_one(Problem prob, Parameters param, out double[] w, double Cp, double Cn)
            double[][] inputs = prob.Inputs;
            int[] labels = prob.Outputs.Apply(x => x >= 0 ? 1 : -1);

            // Create the learning algorithm from the parameters
            var teacher = create(param, Cp, Cn, inputs, labels);

            Trace.WriteLine("Training " + param.Solver);
            // Run the learning algorithm
            var sw = Stopwatch.StartNew();
            SupportVectorMachine svm = teacher.Learn(inputs, labels);

            double error = new HingeLoss(labels).Loss(svm.Score(inputs));

            // Save the solution
            w = svm.ToWeights();

            Trace.WriteLine(String.Format("Finished {0}: {1} in {2}", 
                param.Solver, error, sw.Elapsed));
