예제 #1
0
        /**
         * @param target predicted classes
         */
        public static void crossValidation(Problem prob, Parameter param, int nr_fold, double[] target)
        {
            int i;
            int[] fold_start = new int[nr_fold + 1];
            int l = prob.l;
            int[] perm = new int[l];

            for (i = 0; i < l; i++)
                perm[i] = i;
            for (i = 0; i < l; i++)
            {
                int j = i + random.Next(l - i);
                swap(perm, i, j);
            }
            for (i = 0; i <= nr_fold; i++)
                fold_start[i] = i * l / nr_fold;

            for (i = 0; i < nr_fold; i++)
            {
                int begin = fold_start[i];
                int end = fold_start[i + 1];
                int j, k;
                Problem subprob = new Problem();

                subprob.bias = prob.bias;
                subprob.n = prob.n;
                subprob.l = l - (end - begin);
                subprob.x = new Feature[subprob.l][];
                subprob.y = new double[subprob.l];

                k = 0;
                for (j = 0; j < begin; j++)
                {
                    subprob.x[k] = prob.x[perm[j]];
                    subprob.y[k] = prob.y[perm[j]];
                    ++k;
                }
                for (j = end; j < l; j++)
                {
                    subprob.x[k] = prob.x[perm[j]];
                    subprob.y[k] = prob.y[perm[j]];
                    ++k;
                }
                Model submodel = train(subprob, param);
                for (j = begin; j < end; j++)
                    target[perm[j]] = predict(submodel, prob.x[perm[j]]);
            }
        }
예제 #2
0
        internal void parse_command_line(string[] argv)
        {
            int i;

            // eps: see setting below
            Parameter = new Parameter(SolverType.getById(SolverType.L2R_L2LOSS_SVC_DUAL), 1, Double.PositiveInfinity, 0.1);
            // default values
            Bias = -1;
            cross_validation = false;


            // parse options
            for (i = 0; i < argv.Length; i++) {
                if (argv[i][0] != '-') break;
                if (++i >= argv.Length) exit_with_help();
                switch (argv[i - 1][1]) {
                    case 's':
                        Parameter.solverType = SolverType.getById(Linear.atoi(argv[i]));
                        break;
                    case 'c':
                        Parameter.setC(Linear.atof(argv[i]));
                        break;
                    case 'p':
                        Parameter.setP(Linear.atof(argv[i]));
                        break;
                    case 'e':
                        Parameter.setEps(Linear.atof(argv[i]));
                        break;
                    case 'B':
                        Bias = Linear.atof(argv[i]);
                        break;
                    case 'w':
                        int weightLabel = int.Parse(argv[i - 1].Substring(2));
                        double weight = double.Parse(argv[i]);
                        Parameter.weightLabel = addToArray(Parameter.weightLabel, weightLabel);
                        Parameter.weight = addToArray(Parameter.weight, weight);
                        break;
                    case 'v':
                        cross_validation = true;
                        nr_fold = int.Parse(argv[i]);
                        if (nr_fold < 2) {
                            Console.Error.WriteLine("n-fold cross validation: n must >= 2");
                            exit_with_help();
                        }
                        break;
                    case 'q':
                        i--;
                        Linear.disableDebugOutput();
                        break;
                    default:
                        Console.Error.WriteLine("unknown option");
                        exit_with_help();
                        break;
                }
            }


            // determine filenames


            if (i >= argv.Length) exit_with_help();


            inputFilename = argv[i];


            if (i < argv.Length - 1)
                modelFilename = argv[i + 1];
            else {
                int p = argv[i].LastIndexOf('/');
                ++p; // whew...
                modelFilename = argv[i].Substring(p) + ".model";
            }


            if (Parameter.eps == Double.PositiveInfinity) {
                switch (Parameter.solverType.getId()) {
                    case SolverType.L2R_LR:
                    case SolverType.L2R_L2LOSS_SVC:
                        Parameter.setEps(0.01);
                        break;
                    case SolverType.L2R_L2LOSS_SVR:
                        Parameter.setEps(0.001);
                        break;
                    case SolverType.L2R_L2LOSS_SVC_DUAL:
                    case SolverType.L2R_L1LOSS_SVC_DUAL:
                    case SolverType.MCSVM_CS:
                    case SolverType.L2R_LR_DUAL:
                        Parameter.setEps(0.1);
                        break;
                    case SolverType.L1R_L2LOSS_SVC:
                    case SolverType.L1R_LR:
                        Parameter.setEps(0.01);
                        break;
                    case SolverType.L2R_L1LOSS_SVR_DUAL:
                    case SolverType.L2R_L2LOSS_SVR_DUAL:
                        Parameter.setEps(0.1);
                        break;
                    default:
                        throw new InvalidOperationException("unknown solver type: " + Parameter.solverType);
                }
            }
        }
예제 #3
0
        /**
         * A coordinate descent algorithm for
         * L1-loss and L2-loss epsilon-SVR dual problem
         *
         *  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
         *    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
         *
         *  where Qij = xi^T xj and
         *  D is a diagonal matrix
         *
         * In L1-SVM case:
         *         upper_bound_i = C
         *         lambda_i = 0
         * In L2-SVM case:
         *         upper_bound_i = INF
         *         lambda_i = 1/(2*C)
         *
         * Given:
         * x, y, p, C
         * eps is the stopping tolerance
         *
         * solution will be put in w
         *
         * See Algorithm 4 of Ho and Lin, 2012
         */
        private static void solve_l2r_l1l2_svr(Problem prob, double[] w, Parameter param)
        {
            int l = prob.l;
            double C = param.C;
            double p = param.p;
            int w_size = prob.n;
            double eps = param.eps;
            int i, s, iter = 0;
            int max_iter = 1000;
            int active_size = l;
            int[] index = new int[l];

            double d, G, H;
            double Gmax_old = Double.PositiveInfinity;
            double Gmax_new, Gnorm1_new;
            double Gnorm1_init = 0; // initialize to 0 to get rid of Eclipse warning/error
            double[] beta = new double[l];
            double[] QD = new double[l];
            double[] y = prob.y;

            // L2R_L2LOSS_SVR_DUAL
            double[] lambda = new double[] { 0.5 / C };
            double[] upper_bound = new double[] { Double.PositiveInfinity };


            if (param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL)
            {
                lambda[0] = 0;
                upper_bound[0] = C;
            }

            // Initial beta can be set here. Note that
            // -upper_bound <= beta[i] <= upper_bound
            for (i = 0; i < l; i++)
                beta[i] = 0;

            for (i = 0; i < w_size; i++)
                w[i] = 0;
            for (i = 0; i < l; i++)
            {
                QD[i] = 0;
                foreach (Feature xi in prob.x[i])
                {
                    double val = xi.Value;
                    QD[i] += val * val;
                    w[xi.Index - 1] += beta[i] * val;
                }

                index[i] = i;
            }

            while (iter < max_iter)
            {
                Gmax_new = 0;
                Gnorm1_new = 0;

                for (i = 0; i < active_size; i++)
                {
                    int j = i + random.Next(active_size - i);
                    swap(index, i, j);
                }

                for (s = 0; s < active_size; s++)
                {
                    i = index[s];
                    G = -y[i] + lambda[GETI_SVR(i)] * beta[i];
                    H = QD[i] + lambda[GETI_SVR(i)];

                    foreach (Feature xi in prob.x[i])
                    {
                        int ind = xi.Index - 1;
                        double val = xi.Value;
                        G += val * w[ind];
                    }

                    double Gp = G + p;
                    double Gn = G - p;
                    double violation = 0;
                    if (beta[i] == 0)
                    {
                        if (Gp < 0)
                            violation = -Gp;
                        else if (Gn > 0)
                            violation = Gn;
                        else if (Gp > Gmax_old && Gn < -Gmax_old)
                        {
                            active_size--;
                            swap(index, s, active_size);
                            s--;
                            continue;
                        }
                    }
                    else if (beta[i] >= upper_bound[GETI_SVR(i)])
                    {
                        if (Gp > 0)
                            violation = Gp;
                        else if (Gp < -Gmax_old)
                        {
                            active_size--;
                            swap(index, s, active_size);
                            s--;
                            continue;
                        }
                    }
                    else if (beta[i] <= -upper_bound[GETI_SVR(i)])
                    {
                        if (Gn < 0)
                            violation = -Gn;
                        else if (Gn > Gmax_old)
                        {
                            active_size--;
                            swap(index, s, active_size);
                            s--;
                            continue;
                        }
                    }
                    else if (beta[i] > 0)
                        violation = Math.Abs(Gp);
                    else
                        violation = Math.Abs(Gn);

                    Gmax_new = Math.Max(Gmax_new, violation);
                    Gnorm1_new += violation;

                    // obtain Newton direction d
                    if (Gp < H * beta[i])
                        d = -Gp / H;
                    else if (Gn > H * beta[i])
                        d = -Gn / H;
                    else
                        d = -beta[i];

                    if (Math.Abs(d) < 1.0e-12) continue;

                    double beta_old = beta[i];
                    beta[i] = Math.Min(Math.Max(beta[i] + d, -upper_bound[GETI_SVR(i)]), upper_bound[GETI_SVR(i)]);
                    d = beta[i] - beta_old;

                    if (d != 0)
                    {
                        foreach (Feature xi in prob.x[i])
                        {
                            w[xi.Index - 1] += d * xi.Value;
                        }
                    }
                }

                if (iter == 0) Gnorm1_init = Gnorm1_new;
                iter++;
                if (iter % 10 == 0) info(".");

                if (Gnorm1_new <= eps * Gnorm1_init)
                {
                    if (active_size == l)
                        break;
                    else
                    {
                        active_size = l;
                        info("*");
                        Gmax_old = Double.PositiveInfinity;
                        continue;
                    }
                }

                Gmax_old = Gmax_new;
            }

            info("noptimization finished, #iter = {0}", iter);
            if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n");

            // calculate objective value
            double v = 0;
            int nSV = 0;
            for (i = 0; i < w_size; i++)
                v += w[i] * w[i];
            v = 0.5 * v;
            for (i = 0; i < l; i++)
            {
                v += p * Math.Abs(beta[i]) - y[i] * beta[i] + 0.5 * lambda[GETI_SVR(i)] * beta[i] * beta[i];
                if (beta[i] != 0) nSV++;
            }


            info("Objective value = {0}", v);
            info("nSV = {0}", nSV);
        }
예제 #4
0
        private static void train_one(Problem prob, Parameter param, double[] w, double Cp, double Cn)
        {
            double eps = param.eps;
            int pos = 0;
            for (int i = 0; i < prob.l; i++)
                if (prob.y[i] > 0)
                {
                    pos++;
                }
            int neg = prob.l - pos;


            double primal_solver_tol = eps * Math.Max(Math.Min(pos, neg), 1) / prob.l;

            IFunction fun_obj = null;
            switch (param.solverType.getId())
            {
                case SolverType.L2R_LR:
                    {
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                        {
                            if (prob.y[i] > 0)
                                C[i] = Cp;
                            else
                                C[i] = Cn;
                        }
                        fun_obj = new L2R_LrFunction(prob, C);
                        Tron tron_obj = new Tron(fun_obj, primal_solver_tol);
                        tron_obj.tron(w);
                        break;
                    }
                case SolverType.L2R_L2LOSS_SVC:
                    {
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                        {
                            if (prob.y[i] > 0)
                                C[i] = Cp;
                            else
                                C[i] = Cn;
                        }
                        fun_obj = new L2R_L2_SvcFunction(prob, C);
                        Tron tron_obj = new Tron(fun_obj, primal_solver_tol);
                        tron_obj.tron(w);
                        break;
                    }
                case SolverType.L2R_L2LOSS_SVC_DUAL:
                    solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L2LOSS_SVC_DUAL));
                    break;
                case SolverType.L2R_L1LOSS_SVC_DUAL:
                    solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.getById(SolverType.L2R_L1LOSS_SVC_DUAL));
                    break;
                case SolverType.L1R_L2LOSS_SVC:
                    {
                        Problem prob_col = transpose(prob);
                        solve_l1r_l2_svc(prob_col, w, primal_solver_tol, Cp, Cn);
                        break;
                    }
                case SolverType.L1R_LR:
                    {
                        Problem prob_col = transpose(prob);
                        solve_l1r_lr(prob_col, w, primal_solver_tol, Cp, Cn);
                        break;
                    }
                case SolverType.L2R_LR_DUAL:
                    solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
                    break;
                case SolverType.L2R_L2LOSS_SVR:
                    {
                        double[] C = new double[prob.l];
                        for (int i = 0; i < prob.l; i++)
                            C[i] = param.C;

                        fun_obj = new L2R_L2_SvrFunction(prob, C, param.p);
                        Tron tron_obj = new Tron(fun_obj, param.eps);
                        tron_obj.tron(w);
                        break;
                    }
                case SolverType.L2R_L1LOSS_SVR_DUAL:
                case SolverType.L2R_L2LOSS_SVR_DUAL:
                    solve_l2r_l1l2_svr(prob, w, param);
                    break;

                default:
                    throw new InvalidOperationException("unknown solver type: " + param.solverType);
            }
        }
예제 #5
0
        /**
         * @throws IllegalArgumentException if the feature nodes of prob are not sorted in ascending order
         */
        public static Model train(Problem prob, Parameter param)
        {
            if (prob == null) throw new ArgumentNullException("problem must not be null");
            if (param == null) throw new ArgumentNullException("parameter must not be null");


            if (prob.n == 0) throw new ArgumentNullException("problem has zero features");
            if (prob.l == 0) throw new ArgumentNullException("problem has zero instances");

            foreach (Feature[] nodes in prob.x)
            {
                int indexBefore = 0;
                foreach (Feature n_ in nodes)
                {
                    if (n_.Index <= indexBefore)
                    {
                        throw new ArgumentException("feature nodes must be sorted by index in ascending order");
                    }
                    indexBefore = n_.Index;
                }
            }

            int l = prob.l;
            int n = prob.n;
            int w_size = prob.n;
            Model model = new Model();

            if (prob.bias >= 0)
                model.nr_feature = n - 1;
            else
                model.nr_feature = n;

            model.solverType = param.solverType;
            model.bias = prob.bias;

            if (param.solverType.getId() == SolverType.L2R_L2LOSS_SVR || //
                param.solverType.getId() == SolverType.L2R_L1LOSS_SVR_DUAL || //
                param.solverType.getId() == SolverType.L2R_L2LOSS_SVR_DUAL)
            {
                model.w = new double[w_size];
                model.nr_class = 2;
                model.label = null;

                checkProblemSize(n, model.nr_class);

                train_one(prob, param, model.w, 0, 0);
            }
            else
            {
                int[] perm = new int[l];

                // group training data of the same class
                GroupClassesReturn rv = groupClasses(prob, perm);
                int nr_class = rv.nr_class;
                int[] label = rv.label;
                int[] start = rv.start;
                int[] count = rv.count;

                checkProblemSize(n, nr_class);

                model.nr_class = nr_class;
                model.label = new int[nr_class];
                for (int i = 0; i < nr_class; i++)
                    model.label[i] = label[i];

                // calculate weighted C
                double[] weighted_C = new double[nr_class];
                for (int i = 0; i < nr_class; i++)
                    weighted_C[i] = param.C;
                for (int i = 0; i < param.getNumWeights(); i++)
                {
                    int j;
                    for (j = 0; j < nr_class; j++)
                        if (param.weightLabel[i] == label[j]) break;

                    if (j == nr_class) throw new ArgumentException("class label " + param.weightLabel[i] + " specified in weight is not found");
                    weighted_C[j] *= param.weight[i];
                }

                // constructing the subproblem
                Feature[][] x = new Feature[l][];
                for (int i = 0; i < l; i++)
                    x[i] = prob.x[perm[i]];

                Problem sub_prob = new Problem();
                sub_prob.l = l;
                sub_prob.n = n;
                sub_prob.x = new Feature[sub_prob.l][];
                sub_prob.y = new double[sub_prob.l];

                for (int k = 0; k < sub_prob.l; k++)
                    sub_prob.x[k] = x[k];

                // multi-class svm by Crammer and Singer
                if (param.solverType.getId() == SolverType.MCSVM_CS)
                {
                    model.w = new double[n * nr_class];
                    for (int i = 0; i < nr_class; i++)
                    {
                        for (int j = start[i]; j < start[i] + count[i]; j++)
                        {
                            sub_prob.y[j] = i;
                        }
                    }

                    SolverMCSVM_CS solver = new SolverMCSVM_CS(sub_prob, nr_class, weighted_C, param.eps);
                    solver.solve(model.w);
                }
                else
                {
                    if (nr_class == 2)
                    {
                        model.w = new double[w_size];

                        int e0 = start[0] + count[0];
                        int k = 0;
                        for (; k < e0; k++)
                            sub_prob.y[k] = +1;
                        for (; k < sub_prob.l; k++)
                            sub_prob.y[k] = -1;

                        train_one(sub_prob, param, model.w, weighted_C[0], weighted_C[1]);
                    }
                    else
                    {
                        model.w = new double[w_size * nr_class];
                        double[] w = new double[w_size];
                        for (int i = 0; i < nr_class; i++)
                        {
                            int si = start[i];
                            int ei = si + count[i];

                            int k = 0;
                            for (; k < si; k++)
                                sub_prob.y[k] = -1;
                            for (; k < ei; k++)
                                sub_prob.y[k] = +1;
                            for (; k < sub_prob.l; k++)
                                sub_prob.y[k] = -1;

                            train_one(sub_prob, param, w, weighted_C[i], param.C);

                            for (int j = 0; j < n; j++)
                                model.w[j * nr_class + i] = w[j];
                        }
                    }
                }
            }
            return model;
        }