/// <summary> /// Performs cross validation. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to test</param> /// <param name="nrfold">The number of cross validations to use</param> /// <returns>The cross validation score</returns> public static double PerformCrossValidation(Problem problem, Parameter parameters, int nrfold) { string error = Procedures.svm_check_parameter(problem, parameters); if (error == null) return doCrossValidation(problem, parameters, nrfold); else throw new Exception(error); }
/// <summary> /// Scales a problem using the provided range. This will not affect the parameter. /// </summary> /// <param name="prob">The problem to scale</param> /// <param name="range">The Range transform to use in scaling</param> /// <returns>The Scaled problem</returns> public static Problem Scale(this IRangeTransform range, Problem prob) { Problem scaledProblem = new Problem(prob.Count, new double[prob.Count], new Node[prob.Count][], prob.MaxIndex); for (int i = 0; i < scaledProblem.Count; i++) { scaledProblem.X[i] = new Node[prob.X[i].Length]; for (int j = 0; j < scaledProblem.X[i].Length; j++) scaledProblem.X[i][j] = new Node(prob.X[i][j].Index, range.Transform(prob.X[i][j].Value, prob.X[i][j].Index)); scaledProblem.Y[i] = prob.Y[i]; } return scaledProblem; }
/// <summary> /// Determines the Gaussian transform for the provided problem. /// </summary> /// <param name="prob">The Problem to analyze</param> /// <returns>The Gaussian transform for the problem</returns> public static GaussianTransform Compute(Problem prob) { int[] counts = new int[prob.MaxIndex]; double[] means = new double[prob.MaxIndex]; foreach (Node[] sample in prob.X) { for (int i = 0; i < sample.Length; i++) { means[sample[i].Index - 1] += sample[i].Value; counts[sample[i].Index - 1]++; } } for (int i = 0; i < prob.MaxIndex; i++) { if (counts[i] == 0) counts[i] = 2; means[i] /= counts[i]; } double[] stddevs = new double[prob.MaxIndex]; foreach (Node[] sample in prob.X) { for (int i = 0; i < sample.Length; i++) { double diff = sample[i].Value - means[sample[i].Index - 1]; stddevs[sample[i].Index - 1] += diff * diff; } } for (int i = 0; i < prob.MaxIndex; i++) { if (stddevs[i] == 0) continue; stddevs[i] /= (counts[i] - 1); stddevs[i] = Math.Sqrt(stddevs[i]); } return new GaussianTransform(means, stddevs); }
private void DoSearch( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, StreamWriter output, int nrfold, ref double C, ref double Gamma, ref double crossValidation) { for (int i = 0; i < CValues.Count; i++) { for (int j = 0; j < GammaValues.Count; j++) { if (!_Cs.Contains(CValues[i]) || !_Gammas.Contains(GammaValues[j])) { parameters.C = CValues[i]; parameters.Gamma = GammaValues[j]; double test = Training.PerformCrossValidation(problem, parameters, nrfold); Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (output != null) output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (test > crossValidation) { C = parameters.C; Gamma = parameters.Gamma; crossValidation = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); _Gammas.Add(GammaValues[j]); } _crossIterations++; OnEndEpoch(); } _Cs.Add(CValues[i]); } }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if validation data isn't available, as it will /// divide the training data and train on a portion of it and test on the rest. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The set of C values to use</param> /// <param name="GammaValues">The set of Gamma values to use</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="nrfold">The number of times the data should be divided for validation</param> /// <param name="C">The optimal C value will be placed in this variable</param> /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param> public void Grid( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, int nrfold, out double C, out double Gamma) { C = 0; Gamma = 0; double crossValidation = double.MinValue; StreamWriter output = null; if (outputFile != null) output = new StreamWriter(outputFile); _Cs = new List<double>(); _Gammas = new List<double>(); _cycles = CValues.Count * GammaValues.Count + (int)Math.Pow(FINE_STEP_WIDTH * 2 + 1, 2); _crossIterations = 0; DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation); // Tính lại các giá trị cho search kỹ double dblCurrentCPow = Math.Log(C, 2); double dblCurrentGammaPow = Math.Log(Gamma, 2); CValues = GetList(dblCurrentCPow - C_FINE_STEP * FINE_STEP_WIDTH, dblCurrentCPow + C_FINE_STEP * FINE_STEP_WIDTH, C_FINE_STEP); GammaValues = GetList(dblCurrentGammaPow - G_FINE_STEP * FINE_STEP_WIDTH, dblCurrentGammaPow + G_FINE_STEP * FINE_STEP_WIDTH, G_FINE_STEP); DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation); if (output != null) output.Close(); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The set of C values to use</param> /// <param name="GammaValues">The set of Gamma values to use</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="C">The optimal C value will be put into this variable</param> /// <param name="Gamma">The optimal Gamma value will be put into this variable</param> public void Grid( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. The default ranges of C and Gamma values are used. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="C">The optimal C value will be put into this variable</param> /// <param name="Gamma">The optimal Gamma value will be put into this variable</param> public void Grid( Problem problem, Parameter parameters, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, NFOLD, out C, out Gamma); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. /// </summary> /// <param name="problem">The training data</param> /// <param name="validation">The validation data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The C values to use</param> /// <param name="GammaValues">The Gamma values to use</param> /// <param name="outputFile">The output file for the parameter results</param> /// <param name="C">The optimal C value will be placed in this variable</param> /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param> public static void Grid( Problem problem, Problem validation, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, out double C, out double Gamma) { C = 0; Gamma = 0; double maxScore = double.MinValue; StreamWriter output = null; if (outputFile != null) output = new StreamWriter(outputFile); for (int i = 0; i < CValues.Count; i++) for (int j = 0; j < GammaValues.Count; j++) { parameters.C = CValues[i]; parameters.Gamma = GammaValues[j]; Model model = Training.Train(problem, parameters); double test = Prediction.Predict(validation, "tmp.txt", model, false); Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (output != null) output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (test > maxScore) { C = parameters.C; Gamma = parameters.Gamma; maxScore = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); } if (output != null) output.Close(); }
/// <summary> /// Trains a model using the provided training data and parameters. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use</param> /// <returns>A trained SVM Model</returns> public static Model Train(Problem problem, Parameter parameters) { string error = Procedures.svm_check_parameter(problem, parameters); if (error == null) return Procedures.svm_train(problem, parameters); else throw new Exception(error); }
private static void parseCommandLine(string[] args, out Parameter parameters, out Problem problem, out bool crossValidation, out int nrfold, out string modelFilename) { int i; parameters = new Parameter(); // default values crossValidation = false; nrfold = 0; // parse options for (i = 0; i < args.Length; i++) { if (args[i][0] != '-') break; ++i; switch (args[i - 1][1]) { case 's': parameters.SvmType = (SvmType)int.Parse(args[i]); break; case 't': parameters.KernelType = (KernelType)int.Parse(args[i]); break; case 'd': parameters.Degree = int.Parse(args[i]); break; case 'g': parameters.Gamma = double.Parse(args[i]); break; case 'r': parameters.Coefficient0 = double.Parse(args[i]); break; case 'n': parameters.Nu = double.Parse(args[i]); break; case 'm': parameters.CacheSize = double.Parse(args[i]); break; case 'c': parameters.C = double.Parse(args[i]); break; case 'e': parameters.EPS = double.Parse(args[i]); break; case 'p': parameters.P = double.Parse(args[i]); break; case 'h': parameters.Shrinking = int.Parse(args[i]) == 1; break; case 'b': parameters.Probability = int.Parse(args[i]) == 1; break; case 'v': crossValidation = true; nrfold = int.Parse(args[i]); if (nrfold < 2) { throw new ArgumentException("n-fold cross validation: n must >= 2"); } break; case 'w': parameters.Weights[int.Parse(args[i - 1].Substring(2))] = double.Parse(args[1]); break; default: throw new ArgumentException("Unknown Parameter"); } } // determine filenames if (i >= args.Length) throw new ArgumentException("No input file specified"); problem = Problem.Read(args[i]); if (parameters.Gamma == 0) parameters.Gamma = 1.0 / problem.MaxIndex; if (i < args.Length - 1) modelFilename = args[i + 1]; else { int p = args[i].LastIndexOf('/') + 1; modelFilename = args[i].Substring(p) + ".model"; } }
private static double doCrossValidation(Problem problem, Parameter parameters, int nr_fold) { int i; double[] target = new double[problem.Count]; Procedures.svm_cross_validation(problem, parameters, nr_fold, target); int total_correct = 0; double total_error = 0; double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; if (parameters.SvmType == SvmType.EPSILON_SVR || parameters.SvmType == SvmType.NU_SVR) { for (i = 0; i < problem.Count; i++) { double y = problem.Y[i]; double v = target[i]; total_error += (v - y) * (v - y); sumv += v; sumy += y; sumvv += v * v; sumyy += y * y; sumvy += v * y; } return (problem.Count * sumvy - sumv * sumy) / (Math.Sqrt(problem.Count * sumvv - sumv * sumv) * Math.Sqrt(problem.Count * sumyy - sumy * sumy)); } else for (i = 0; i < problem.Count; i++) if (target[i] == problem.Y[i]) ++total_correct; return (double)total_correct / problem.Count; }
/// <summary> /// Determines the Range transform for the provided problem. Uses the default lower and upper bounds. /// </summary> /// <param name="prob">The Problem to analyze</param> /// <returns>The Range transform for the problem</returns> public static RangeTransform Compute(Problem prob) { return Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND); }
/// <summary> /// Determines the Range transform for the provided problem. /// </summary> /// <param name="prob">The Problem to analyze</param> /// <param name="lowerBound">The lower bound for scaling</param> /// <param name="upperBound">The upper bound for scaling</param> /// <returns>The Range transform for the problem</returns> public static RangeTransform Compute(Problem prob, double lowerBound, double upperBound) { double[] minVals = new double[prob.MaxIndex]; double[] maxVals = new double[prob.MaxIndex]; for (int i = 0; i < prob.MaxIndex; i++) { minVals[i] = double.MaxValue; maxVals[i] = double.MinValue; } for (int i = 0; i < prob.Count; i++) { for (int j = 0; j < prob.X[i].Length; j++) { int index = prob.X[i][j].Index - 1; double value = prob.X[i][j].Value; minVals[index] = Math.Min(minVals[index], value); maxVals[index] = Math.Max(maxVals[index], value); } } for (int i = 0; i < prob.MaxIndex; i++) { if (minVals[i] == double.MaxValue || maxVals[i] == double.MinValue) { minVals[i] = 0; maxVals[i] = 0; } } return new RangeTransform(minVals, maxVals, lowerBound, upperBound); }
/// <summary> /// Writes a problem to a stream. /// </summary> /// <param name="stream">The stream to write the problem to.</param> /// <param name="problem">The problem to write.</param> public static void Write(Stream stream, Problem problem) { TemporaryCulture.Start(); StreamWriter output = new StreamWriter(stream); for (int i = 0; i < problem.Count; i++) { output.Write(problem.Y[i]); for (int j = 0; j < problem.X[i].Length; j++) output.Write(" {0}:{1}", problem.X[i][j].Index, problem.X[i][j].Value); output.WriteLine(); } output.Flush(); TemporaryCulture.Stop(); }
/// <summary> /// Predicts the class memberships of all the vectors in the problem. /// </summary> /// <param name="problem">The SVM Problem to solve</param> /// <param name="outputFile">File for result output</param> /// <param name="model">The Model to use</param> /// <param name="predict_probability">Whether to output a distribution over the classes</param> /// <returns>Percentage correctly labelled</returns> public static double Predict( Problem problem, string outputFile, Model model, bool predict_probability) { int correct = 0; int total = 0; double error = 0; double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; StreamWriter output = outputFile != null ? new StreamWriter(outputFile) : null; SvmType svm_type = Procedures.svm_get_svm_type(model); int nr_class = Procedures.svm_get_nr_class(model); int[] labels = new int[nr_class]; double[] prob_estimates = null; if (predict_probability) { if (svm_type == SvmType.EPSILON_SVR || svm_type == SvmType.NU_SVR) { Console.WriteLine("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=" + Procedures.svm_get_svr_probability(model)); } else { Procedures.svm_get_labels(model, labels); prob_estimates = new double[nr_class]; if (output != null) { output.Write("labels"); for (int j = 0; j < nr_class; j++) { output.Write(" " + labels[j]); } output.Write("\n"); } } } else { Procedures.svm_get_labels(model, labels); if (output != null) { output.Write("labels"); for (int j = 0; j < nr_class; j++) { output.Write(" " + labels[j]); } output.Write("\n"); } } for (int i = 0; i < problem.Count; i++) { double target = problem.Y[i]; Node[] x = problem.X[i]; double v; if (predict_probability && (svm_type == SvmType.C_SVC || svm_type == SvmType.NU_SVC)) { v = Procedures.svm_predict_probability(model, x, prob_estimates); if (output != null) { output.Write(target + " " + v + " "); for (int j = 0; j < nr_class; j++) { output.Write(prob_estimates[j] + " "); } output.Write("\n"); } } else { v = Procedures.svm_predict(model, x); if (output != null) output.Write(target + " " + v + "\n"); } if (v == target) ++correct; error += (v - target) * (v - target); sumv += v; sumy += target; sumvv += v * v; sumyy += target * target; sumvy += v * target; ++total; } if (output != null) output.Close(); return (double)correct / total; }
/// <summary> /// Writes a problem to a file. This will overwrite any previous data in the file. /// </summary> /// <param name="filename">The file to write to</param> /// <param name="problem">The problem to write</param> public static void Write(string filename, Problem problem) { FileStream output = File.Open(filename, FileMode.Create); try { Write(output, problem); } finally { output.Close(); } }
/// <summary> /// Determines the Range transform for the provided problem. Uses the default lower and upper bounds. /// </summary> /// <param name="prob">The Problem to analyze</param> /// <returns>The Range transform for the problem</returns> public static RangeTransform Compute(Problem prob) { return(Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND)); }