예제 #1
0
        public void TrainAndTest(IEnumerable <ItemRating> trainSet, IEnumerable <ItemRating> testSet)
        {
            var problem = new SvmProblem()
            {
                X = trainSet.Select(ir => FeatureBuilder.GetSvmNode(ir)).ToArray(),
                Y = trainSet.Select(ir => LabelSelector(ir)).ToArray()
            };


            Parameters.Check(problem);

            Console.WriteLine("Writing training samples...");
            WriteSvmFile(problem, "train.libsvm");

            Console.WriteLine("LibSvm training...");

            LibSvm.SvmModel model = Svm.Train(problem, Parameters);

            var predictedClasses = new List <float>();

            Console.WriteLine("LibSvm testing...");
            foreach (var ir in testSet)
            {
                ir.PredictedRating = (float)model.Predict(FeatureBuilder.GetSvmNode(ir));
                predictedClasses.Add(ir.PredictedRating);
            }

            Console.WriteLine("Writing output...");
            File.WriteAllLines("output.libsvm", predictedClasses.Select(i => i.ToString()));
        }
예제 #2
0
        //
        // Interface functions
        //
        public static SvmModel Train(SvmProblem prob, SvmParameter param)
        {
            var model = new SvmModel();

            model.Param = param;

            if (param.SvmType.IsSVROrOneClass())
            {
                // regression or one-class-svm
                model.NrClass = 2;
                model.Label   = null;
                model.SupportVectorsNumbers = null;
                model.ProbA = null; model.ProbB = null;
                model.SupportVectorsCoefficients = new double[1][];

                if (param.Probability && param.SvmType.IsSVR())
                {
                    model.ProbA    = new double[1];
                    model.ProbA[0] = svm_svr_probability(prob, param);
                }

                DecisionFunction f = svm_train_one(prob, param, 0, 0);
                model.Rho    = new double[1];
                model.Rho[0] = f.Rho;

                int nSV = 0;
                int i;
                for (i = 0; i < prob.Lenght; i++)
                {
                    if (Math.Abs(f.Alpha[i]) > 0)
                    {
                        ++nSV;
                    }
                }
                model.TotalSupportVectorsNumber     = nSV;
                model.SupportVectors                = new SvmNode[nSV][];
                model.SupportVectorsCoefficients[0] = new double[nSV];
                int j = 0;
                for (i = 0; i < prob.Lenght; i++)
                {
                    if (Math.Abs(f.Alpha[i]) > 0)
                    {
                        model.SupportVectors[j] = prob.X[i];
                        model.SupportVectorsCoefficients[0][j] = f.Alpha[i];
                        ++j;
                    }
                }
            }
            else
            {
                // classification
                int   l    = prob.Lenght;
                int[] perm = new int[l];

                int   nr_class;
                int[] label;
                int[] start;
                int[] count;

                // group training data of the same class
                svm_group_classes(prob, out nr_class, out label, out start, out count, perm);

                if (nr_class == 1)
                {
                    Svm.info("WARNING: training data in only one class. See README for details.\n");
                }

                SvmNode[][] x = new SvmNode[l][];
                int         i;
                for (i = 0; i < l; i++)
                {
                    x[i] = prob.X[perm[i]];
                }

                // calculate weighted C

                double[] weighted_C = new double[nr_class];
                for (i = 0; i < nr_class; i++)
                {
                    weighted_C[i] = param.C;
                }
                for (i = 0; i < param.WeightsCount; i++)
                {
                    int j;
                    for (j = 0; j < nr_class; j++)
                    {
                        if (param.WeightLabel[i] == label[j])
                        {
                            break;
                        }
                    }
                    if (j == nr_class)
                    {
                        System.Diagnostics.Debug.WriteLine("WARNING: class label " + param.WeightLabel[i] + " specified in weight is not found\n");
                    }
                    else
                    {
                        weighted_C[j] *= param.Weight[i];
                    }
                }

                // train k*(k-1)/2 models

                var nonzero = new bool[l];
                for (i = 0; i < l; i++)
                {
                    nonzero[i] = false;
                }
                var f = new DecisionFunction[nr_class * (nr_class - 1) / 2];

                double[] probA = null, probB = null;
                if (param.Probability)
                {
                    probA = new double[nr_class * (nr_class - 1) / 2];
                    probB = new double[nr_class * (nr_class - 1) / 2];
                }

                int p = 0;
                for (i = 0; i < nr_class; i++)
                {
                    for (int j = i + 1; j < nr_class; j++)
                    {
                        int si = start[i], sj = start[j];
                        int ci = count[i], cj = count[j];
                        var subprobLenght = ci + cj;
                        var sub_prob      = new SvmProblem
                        {
                            X = new SvmNode[subprobLenght][],
                            Y = new double[subprobLenght]
                        };

                        int k;
                        for (k = 0; k < ci; k++)
                        {
                            sub_prob.X[k] = x[si + k];
                            sub_prob.Y[k] = +1;
                        }
                        for (k = 0; k < cj; k++)
                        {
                            sub_prob.X[ci + k] = x[sj + k];
                            sub_prob.Y[ci + k] = -1;
                        }

                        if (param.Probability)
                        {
                            double[] probAB = new double[2];
                            svm_binary_svc_probability(sub_prob, param, weighted_C[i], weighted_C[j], probAB);
                            probA[p] = probAB[0];
                            probB[p] = probAB[1];
                        }

                        f[p] = svm_train_one(sub_prob, param, weighted_C[i], weighted_C[j]);
                        for (k = 0; k < ci; k++)
                        {
                            if (!nonzero[si + k] && Math.Abs(f[p].Alpha[k]) > 0)
                            {
                                nonzero[si + k] = true;
                            }
                        }
                        for (k = 0; k < cj; k++)
                        {
                            if (!nonzero[sj + k] && Math.Abs(f[p].Alpha[ci + k]) > 0)
                            {
                                nonzero[sj + k] = true;
                            }
                        }
                        ++p;
                    }
                }

                // build output

                model.NrClass = nr_class;

                model.Label = new int[nr_class];
                for (i = 0; i < nr_class; i++)
                {
                    model.Label[i] = label[i];
                }

                model.Rho = new double[nr_class * (nr_class - 1) / 2];
                for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
                {
                    model.Rho[i] = f[i].Rho;
                }

                if (param.Probability)
                {
                    model.ProbA = new double[nr_class * (nr_class - 1) / 2];
                    model.ProbB = new double[nr_class * (nr_class - 1) / 2];
                    for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
                    {
                        model.ProbA[i] = probA[i];
                        model.ProbB[i] = probB[i];
                    }
                }
                else
                {
                    model.ProbA = null;
                    model.ProbB = null;
                }

                int   nnz      = 0;
                int[] nz_count = new int[nr_class];
                model.SupportVectorsNumbers = new int[nr_class];
                for (i = 0; i < nr_class; i++)
                {
                    int nSV = 0;
                    for (int j = 0; j < count[i]; j++)
                    {
                        if (nonzero[start[i] + j])
                        {
                            ++nSV;
                            ++nnz;
                        }
                    }
                    model.SupportVectorsNumbers[i] = nSV;
                    nz_count[i] = nSV;
                }

                Svm.info("Total nSV = " + nnz + "\n");

                model.TotalSupportVectorsNumber = nnz;
                model.SupportVectors            = new SvmNode[nnz][];
                p = 0;
                for (i = 0; i < l; i++)
                {
                    if (nonzero[i])
                    {
                        model.SupportVectors[p++] = x[i];
                    }
                }

                int[] nz_start = new int[nr_class];
                nz_start[0] = 0;
                for (i = 1; i < nr_class; i++)
                {
                    nz_start[i] = nz_start[i - 1] + nz_count[i - 1];
                }

                model.SupportVectorsCoefficients = new double[nr_class - 1][];
                for (i = 0; i < nr_class - 1; i++)
                {
                    model.SupportVectorsCoefficients[i] = new double[nnz];
                }

                p = 0;
                for (i = 0; i < nr_class; i++)
                {
                    for (int j = i + 1; j < nr_class; j++)
                    {
                        // classifier (i,j): coefficients with
                        // i are in sv_coef[j-1][nz_start[i]...],
                        // j are in sv_coef[i][nz_start[j]...]

                        int si = start[i];
                        int sj = start[j];
                        int ci = count[i];
                        int cj = count[j];

                        int q = nz_start[i];
                        int k;
                        for (k = 0; k < ci; k++)
                        {
                            if (nonzero[si + k])
                            {
                                model.SupportVectorsCoefficients[j - 1][q++] = f[p].Alpha[k];
                            }
                        }
                        q = nz_start[j];
                        for (k = 0; k < cj; k++)
                        {
                            if (nonzero[sj + k])
                            {
                                model.SupportVectorsCoefficients[i][q++] = f[p].Alpha[ci + k];
                            }
                        }
                        ++p;
                    }
                }
            }
            return(model);
        }
예제 #3
0
        public static SvmModel LoadModel(IEnumerable <string> lines)
        {
            // read parameters
            var model = new SvmModel();
            var param = new SvmParameter();

            model.Param          = param;
            model.Rho            = null;
            model.ProbA          = null;
            model.ProbB          = null;
            model.Label          = null;
            model.SupportVectors = null;
            var done          = false;
            int m             = 0;
            int l             = 0;
            int currentVector = 0;

            foreach (var cmd in lines)
            {
                var splitted = cmd.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);
                if (!done)
                {
                    switch (splitted[0])
                    {
                    case "svm_type":
                        param.SvmType = (SvmType)Enum.Parse(typeof(SvmType), splitted[1], ignoreCase: true);
                        break;

                    case "kernel_type":
                        param.KernelType = (KernelType)Enum.Parse(typeof(KernelType), splitted[1], ignoreCase: true);
                        break;

                    case "degree":
                        param.Degree = atoi(splitted[1]);
                        break;

                    case "gamma":
                        param.Gamma = atof(splitted[1]);
                        break;

                    case "coef0":
                        param.Coef0 = atof(splitted[1]);
                        break;

                    case "nr_class":
                        model.NrClass = atoi(splitted[1]);
                        break;

                    case "total_sv":
                        model.TotalSupportVectorsNumber = atoi(splitted[1]);
                        break;

                    case "rho":
                        int n = model.NrClass * (model.NrClass - 1) / 2;
                        model.Rho = new double[n];
                        for (int i = 0; i < n; i++)
                        {
                            if (i + 1 < splitted.Length)
                            {
                                model.Rho[i] = atof(splitted[i + 1]);
                            }
                        }
                        break;

                    case "label":
                        int n2 = model.NrClass;
                        model.Label = new int[n2];
                        for (int i = 0; i < n2; i++)
                        {
                            if (i + 1 < splitted.Length)
                            {
                                model.Label[i] = atoi(splitted[i + 1]);
                            }
                        }
                        break;

                    case "probA":
                        int n3 = model.NrClass * (model.NrClass - 1) / 2;
                        model.ProbA = new double[n3];
                        for (int i = 0; i < n3; i++)
                        {
                            if (i + 1 < splitted.Length)
                            {
                                model.ProbA[i] = atof(splitted[i + 1]);
                            }
                        }
                        break;

                    case "probB":
                        int n4 = model.NrClass * (model.NrClass - 1) / 2;
                        model.ProbB = new double[n4];
                        for (int i = 0; i < n4; i++)
                        {
                            if (i + 1 < splitted.Length)
                            {
                                model.ProbB[i] = atof(splitted[i + 1]);
                            }
                        }
                        break;

                    case "nr_sv":
                        int n5 = model.NrClass;
                        model.SupportVectorsNumbers = new int[n5];
                        for (int i = 0; i < n5; i++)
                        {
                            if (i + 1 < splitted.Length)
                            {
                                model.SupportVectorsNumbers[i] = atoi(splitted[i + 1]);
                            }
                        }
                        break;

                    case "SV":
                        done = true;
                        m    = model.NrClass - 1;
                        l    = model.TotalSupportVectorsNumber;
                        model.SupportVectorsCoefficients = new double[m][];
                        for (int i = 0; i < m; i++)
                        {
                            model.SupportVectorsCoefficients[i] = new double[l];
                        }
                        model.SupportVectors = new SvmNode[l][];
                        break;

                    default:
                        System.Diagnostics.Debug.WriteLine("unknown text in model file: [" + cmd + "]");
                        return(null);
                    }
                }
                else if (currentVector < l)
                {
                    // read sv_coef and SV
                    for (int k = 0; k < m; k++)
                    {
                        model.SupportVectorsCoefficients[k][currentVector] = atof(splitted[k]);
                    }
                    int n = splitted.Length - m;
                    model.SupportVectors[currentVector] = new SvmNode[n];
                    for (int j = 0; j < n; j++)
                    {
                        var pair = splitted[m + j].Split(new char[] { ':' }, 2);
                        model.SupportVectors[currentVector][j] = new SvmNode(atoi(pair[0]), atof(pair[1]));
                    }
                    currentVector++;
                }
            }
            return(model);
        }
예제 #4
0
 internal SaveModelEnumerator(SvmModel model)
 {
     this.model = model;
 }
예제 #5
0
 public static IEnumerable <string> SaveModel(SvmModel model)
 {
     return(new SaveModelEnumerator(model));
 }