Пример #1
0
 /// <summary>
 /// Performs cross validation.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to test</param>
 /// <param name="nrfold">The number of cross validations to use</param>
 /// <returns>The cross validation score</returns>
 public static double PerformCrossValidation(Problem problem, Parameter parameters, int nrfold)
 {
     string error = Procedures.svm_check_parameter(problem, parameters);
     if (error == null)
         return doCrossValidation(problem, parameters, nrfold);
     else throw new Exception(error);
 }
Пример #2
0
 /// <summary>
 /// Scales a problem using the provided range.  This will not affect the parameter.
 /// </summary>
 /// <param name="prob">The problem to scale</param>
 /// <param name="range">The Range transform to use in scaling</param>
 /// <returns>The Scaled problem</returns>
 public static Problem Scale(this IRangeTransform range, Problem prob)
 {
     Problem scaledProblem = new Problem(prob.Count, new double[prob.Count], new Node[prob.Count][], prob.MaxIndex);
     for (int i = 0; i < scaledProblem.Count; i++)
     {
         scaledProblem.X[i] = new Node[prob.X[i].Length];
         for (int j = 0; j < scaledProblem.X[i].Length; j++)
             scaledProblem.X[i][j] = new Node(prob.X[i][j].Index, range.Transform(prob.X[i][j].Value, prob.X[i][j].Index));
         scaledProblem.Y[i] = prob.Y[i];
     }
     return scaledProblem;
 }
        /// <summary>
        /// Determines the Gaussian transform for the provided problem.
        /// </summary>
        /// <param name="prob">The Problem to analyze</param>
        /// <returns>The Gaussian transform for the problem</returns>
        public static GaussianTransform Compute(Problem prob)
        {
            int[] counts = new int[prob.MaxIndex];
            double[] means = new double[prob.MaxIndex];
            foreach (Node[] sample in prob.X)
            {
                for (int i = 0; i < sample.Length; i++)
                {
                    means[sample[i].Index - 1] += sample[i].Value;
                    counts[sample[i].Index - 1]++;
                }
            }
            for (int i = 0; i < prob.MaxIndex; i++)
            {
                if (counts[i] == 0)
                    counts[i] = 2;
                means[i] /= counts[i];
            }

            double[] stddevs = new double[prob.MaxIndex];
            foreach (Node[] sample in prob.X)
            {
                for (int i = 0; i < sample.Length; i++)
                {
                    double diff = sample[i].Value - means[sample[i].Index - 1];
                    stddevs[sample[i].Index - 1] += diff * diff;
                }
            }
            for (int i = 0; i < prob.MaxIndex; i++)
            {
                if (stddevs[i] == 0)
                    continue;
                stddevs[i] /= (counts[i] - 1);
                stddevs[i] = Math.Sqrt(stddevs[i]);
            }

            return new GaussianTransform(means, stddevs);
        }
 private void DoSearch(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     StreamWriter output,
     int nrfold,
     ref double C,
     ref double Gamma,
     ref double crossValidation)
 {
     for (int i = 0; i < CValues.Count; i++)
     {
         for (int j = 0; j < GammaValues.Count; j++)
         {
             if (!_Cs.Contains(CValues[i]) || !_Gammas.Contains(GammaValues[j]))
             {
                 parameters.C = CValues[i];
                 parameters.Gamma = GammaValues[j];
                 double test = Training.PerformCrossValidation(problem, parameters, nrfold);
                 Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test);
                 if (output != null)
                     output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
                 if (test > crossValidation)
                 {
                     C = parameters.C;
                     Gamma = parameters.Gamma;
                     crossValidation = test;
                     Console.WriteLine(" New Maximum!");
                 }
                 else Console.WriteLine();
                 _Gammas.Add(GammaValues[j]);
             }
             _crossIterations++;
             OnEndEpoch();
         }
         _Cs.Add(CValues[i]);
     }
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if validation data isn't available, as it will
 /// divide the training data and train on a portion of it and test on the rest.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="nrfold">The number of times the data should be divided for validation</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     int nrfold,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double crossValidation = double.MinValue;
     StreamWriter output = null;
     if (outputFile != null)
         output = new StreamWriter(outputFile);
     _Cs = new List<double>();
     _Gammas = new List<double>();
     _cycles = CValues.Count * GammaValues.Count + (int)Math.Pow(FINE_STEP_WIDTH * 2 + 1, 2);
     _crossIterations = 0;
     DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation);
     // Tính lại các giá trị cho search kỹ
     double dblCurrentCPow = Math.Log(C, 2);
     double dblCurrentGammaPow = Math.Log(Gamma, 2);
     CValues = GetList(dblCurrentCPow - C_FINE_STEP * FINE_STEP_WIDTH, dblCurrentCPow + C_FINE_STEP * FINE_STEP_WIDTH, C_FINE_STEP);
     GammaValues = GetList(dblCurrentGammaPow - G_FINE_STEP * FINE_STEP_WIDTH, dblCurrentGammaPow + G_FINE_STEP * FINE_STEP_WIDTH, G_FINE_STEP);
     DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation);
     if (output != null)
         output.Close();
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if there is no validation data available, and it will
 /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times).
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="C">The optimal C value will be put into this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be put into this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma);
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  The default ranges of C and Gamma values are used.  Use this method if there is no validation data available, and it will
 /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times).
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="C">The optimal C value will be put into this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be put into this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, NFOLD, out C, out Gamma);
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="validation">The validation data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The C values to use</param>
 /// <param name="GammaValues">The Gamma values to use</param>
 /// <param name="outputFile">The output file for the parameter results</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public static void Grid(
     Problem problem,
     Problem validation,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double maxScore = double.MinValue;
     StreamWriter output = null;
     if (outputFile != null)
         output = new StreamWriter(outputFile);
     for (int i = 0; i < CValues.Count; i++)
         for (int j = 0; j < GammaValues.Count; j++)
         {
             parameters.C = CValues[i];
             parameters.Gamma = GammaValues[j];
             Model model = Training.Train(problem, parameters);
             double test = Prediction.Predict(validation, "tmp.txt", model, false);
             Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (output != null)
                 output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (test > maxScore)
             {
                 C = parameters.C;
                 Gamma = parameters.Gamma;
                 maxScore = test;
                 Console.WriteLine(" New Maximum!");
             }
             else Console.WriteLine();
         }
     if (output != null)
         output.Close();
 }
Пример #9
0
        /// <summary>
        /// Trains a model using the provided training data and parameters.
        /// </summary>
        /// <param name="problem">The training data</param>
        /// <param name="parameters">The parameters to use</param>
        /// <returns>A trained SVM Model</returns>
        public static Model Train(Problem problem, Parameter parameters)
        {
            string error = Procedures.svm_check_parameter(problem, parameters);

            if (error == null)
                return Procedures.svm_train(problem, parameters);
            else throw new Exception(error);
        }
Пример #10
0
        private static void parseCommandLine(string[] args, out Parameter parameters, out Problem problem, out bool crossValidation, out int nrfold, out string modelFilename)
        {
            int i;

            parameters = new Parameter();
            // default values

            crossValidation = false;
            nrfold = 0;

            // parse options
            for (i = 0; i < args.Length; i++)
            {
                if (args[i][0] != '-')
                    break;
                ++i;
                switch (args[i - 1][1])
                {

                    case 's':
                        parameters.SvmType = (SvmType)int.Parse(args[i]);
                        break;

                    case 't':
                        parameters.KernelType = (KernelType)int.Parse(args[i]);
                        break;

                    case 'd':
                        parameters.Degree = int.Parse(args[i]);
                        break;

                    case 'g':
                        parameters.Gamma = double.Parse(args[i]);
                        break;

                    case 'r':
                        parameters.Coefficient0 = double.Parse(args[i]);
                        break;

                    case 'n':
                        parameters.Nu = double.Parse(args[i]);
                        break;

                    case 'm':
                        parameters.CacheSize = double.Parse(args[i]);
                        break;

                    case 'c':
                        parameters.C = double.Parse(args[i]);
                        break;

                    case 'e':
                        parameters.EPS = double.Parse(args[i]);
                        break;

                    case 'p':
                        parameters.P = double.Parse(args[i]);
                        break;

                    case 'h':
                        parameters.Shrinking = int.Parse(args[i]) == 1;
                        break;

                    case 'b':
                        parameters.Probability = int.Parse(args[i]) == 1;
                        break;

                    case 'v':
                        crossValidation = true;
                        nrfold = int.Parse(args[i]);
                        if (nrfold < 2)
                        {
                            throw new ArgumentException("n-fold cross validation: n must >= 2");
                        }
                        break;

                    case 'w':
                        parameters.Weights[int.Parse(args[i - 1].Substring(2))] = double.Parse(args[1]);
                        break;

                    default:
                        throw new ArgumentException("Unknown Parameter");
                }
            }

            // determine filenames

            if (i >= args.Length)
                throw new ArgumentException("No input file specified");

            problem = Problem.Read(args[i]);

            if (parameters.Gamma == 0)
                parameters.Gamma = 1.0 / problem.MaxIndex;

            if (i < args.Length - 1)
                modelFilename = args[i + 1];
            else
            {
                int p = args[i].LastIndexOf('/') + 1;
                modelFilename = args[i].Substring(p) + ".model";
            }
        }
Пример #11
0
 private static double doCrossValidation(Problem problem, Parameter parameters, int nr_fold)
 {
     int i;
     double[] target = new double[problem.Count];
     Procedures.svm_cross_validation(problem, parameters, nr_fold, target);
     int total_correct = 0;
     double total_error = 0;
     double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
     if (parameters.SvmType == SvmType.EPSILON_SVR || parameters.SvmType == SvmType.NU_SVR)
     {
         for (i = 0; i < problem.Count; i++)
         {
             double y = problem.Y[i];
             double v = target[i];
             total_error += (v - y) * (v - y);
             sumv += v;
             sumy += y;
             sumvv += v * v;
             sumyy += y * y;
             sumvy += v * y;
         }
         return (problem.Count * sumvy - sumv * sumy) / (Math.Sqrt(problem.Count * sumvv - sumv * sumv) * Math.Sqrt(problem.Count * sumyy - sumy * sumy));
     }
     else
         for (i = 0; i < problem.Count; i++)
             if (target[i] == problem.Y[i])
                 ++total_correct;
     return (double)total_correct / problem.Count;
 }
Пример #12
0
 /// <summary>
 /// Determines the Range transform for the provided problem.  Uses the default lower and upper bounds.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(Problem prob)
 {
     return Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND);
 }
Пример #13
0
 /// <summary>
 /// Determines the Range transform for the provided problem.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <param name="lowerBound">The lower bound for scaling</param>
 /// <param name="upperBound">The upper bound for scaling</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(Problem prob, double lowerBound, double upperBound)
 {
     double[] minVals = new double[prob.MaxIndex];
     double[] maxVals = new double[prob.MaxIndex];
     for (int i = 0; i < prob.MaxIndex; i++)
     {
         minVals[i] = double.MaxValue;
         maxVals[i] = double.MinValue;
     }
     for (int i = 0; i < prob.Count; i++)
     {
         for (int j = 0; j < prob.X[i].Length; j++)
         {
             int index = prob.X[i][j].Index - 1;
             double value = prob.X[i][j].Value;
             minVals[index] = Math.Min(minVals[index], value);
             maxVals[index] = Math.Max(maxVals[index], value);
         }
     }
     for (int i = 0; i < prob.MaxIndex; i++)
     {
         if (minVals[i] == double.MaxValue || maxVals[i] == double.MinValue)
         {
             minVals[i] = 0;
             maxVals[i] = 0;
         }
     }
     return new RangeTransform(minVals, maxVals, lowerBound, upperBound);
 }
Пример #14
0
        /// <summary>
        /// Writes a problem to a stream.
        /// </summary>
        /// <param name="stream">The stream to write the problem to.</param>
        /// <param name="problem">The problem to write.</param>
        public static void Write(Stream stream, Problem problem)
        {
            TemporaryCulture.Start();

            StreamWriter output = new StreamWriter(stream);
            for (int i = 0; i < problem.Count; i++)
            {
                output.Write(problem.Y[i]);
                for (int j = 0; j < problem.X[i].Length; j++)
                    output.Write(" {0}:{1}", problem.X[i][j].Index, problem.X[i][j].Value);
                output.WriteLine();
            }
            output.Flush();

            TemporaryCulture.Stop();
        }
Пример #15
0
        /// <summary>
        /// Predicts the class memberships of all the vectors in the problem.
        /// </summary>
        /// <param name="problem">The SVM Problem to solve</param>
        /// <param name="outputFile">File for result output</param>
        /// <param name="model">The Model to use</param>
        /// <param name="predict_probability">Whether to output a distribution over the classes</param>
        /// <returns>Percentage correctly labelled</returns>
        public static double Predict(
            Problem problem,
            string outputFile,
            Model model,
            bool predict_probability)
        {
            int correct = 0;
            int total = 0;
            double error = 0;
            double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
            StreamWriter output = outputFile != null ? new StreamWriter(outputFile) : null;

            SvmType svm_type = Procedures.svm_get_svm_type(model);
            int nr_class = Procedures.svm_get_nr_class(model);
            int[] labels = new int[nr_class];
            double[] prob_estimates = null;

            if (predict_probability)
            {
                if (svm_type == SvmType.EPSILON_SVR || svm_type == SvmType.NU_SVR)
                {
                    Console.WriteLine("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=" + Procedures.svm_get_svr_probability(model));
                }
                else
                {
                    Procedures.svm_get_labels(model, labels);
                    prob_estimates = new double[nr_class];
                    if (output != null)
                    {
                        output.Write("labels");
                        for (int j = 0; j < nr_class; j++)
                        {
                            output.Write(" " + labels[j]);
                        }
                        output.Write("\n");
                    }
                }
            }
            else
            {
                Procedures.svm_get_labels(model, labels);
                if (output != null)
                {
                    output.Write("labels");
                    for (int j = 0; j < nr_class; j++)
                    {
                        output.Write(" " + labels[j]);
                    }
                    output.Write("\n");
                }
            }
            for (int i = 0; i < problem.Count; i++)
            {
                double target = problem.Y[i];
                Node[] x = problem.X[i];

                double v;
                if (predict_probability && (svm_type == SvmType.C_SVC || svm_type == SvmType.NU_SVC))
                {
                    v = Procedures.svm_predict_probability(model, x, prob_estimates);
                    if (output != null)
                    {
                        output.Write(target + " " + v + " ");
                        for (int j = 0; j < nr_class; j++)
                        {
                            output.Write(prob_estimates[j] + " ");
                        }
                        output.Write("\n");
                    }
                }
                else
                {
                    v = Procedures.svm_predict(model, x);
                    if (output != null)
                        output.Write(target + " " + v + "\n");
                }

                if (v == target)
                    ++correct;
                error += (v - target) * (v - target);
                sumv += v;
                sumy += target;
                sumvv += v * v;
                sumyy += target * target;
                sumvy += v * target;
                ++total;
            }
            if (output != null)
                output.Close();
            return (double)correct / total;
        }
Пример #16
0
 /// <summary>
 /// Writes a problem to a file.   This will overwrite any previous data in the file.
 /// </summary>
 /// <param name="filename">The file to write to</param>
 /// <param name="problem">The problem to write</param>
 public static void Write(string filename, Problem problem)
 {
     FileStream output = File.Open(filename, FileMode.Create);
     try
     {
         Write(output, problem);
     }
     finally
     {
         output.Close();
     }
 }
Пример #17
0
 /// <summary>
 /// Determines the Range transform for the provided problem.  Uses the default lower and upper bounds.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(Problem prob)
 {
     return(Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND));
 }