/// <summary> /// Performs cross validation. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to test</param> /// <param name="nrfold">The number of cross validations to use</param> /// <returns>The cross validation score</returns> public static double PerformCrossValidation(Problem problem, Parameter parameters, int nrfold) { string error = Procedures.svm_check_parameter(problem, parameters); if (error == null) return doCrossValidation(problem, parameters, nrfold); else throw new Exception(error); }
/// <summary> /// Constructor. /// </summary> /// <param name="rows">Nodes to use as the rows of the matrix</param> /// <param name="columns">Nodes to use as the columns of the matrix</param> /// <param name="param">Parameters to use when compute similarities</param> public PrecomputedKernel(List<Node[]> rows, List<Node[]> columns, Parameter param) { _rows = rows.Count; _columns = columns.Count; _similarities = new float[_rows, _columns]; for (int r = 0; r < _rows; r++) for (int c = 0; c < _columns; c++) _similarities[r, c] = (float)Kernel.KernelFunction(rows[r], columns[c], param); }
/// <summary> /// Constructor. /// </summary> /// <param name="nodes">Nodes for self-similarity analysis</param> /// <param name="param">Parameters to use when computing similarities</param> public PrecomputedKernel(List<Node[]> nodes, Parameter param) { _rows = nodes.Count; _columns = _rows; _similarities = new float[_rows, _columns]; for (int r = 0; r < _rows; r++) { for (int c = 0; c < r; c++) _similarities[r, c] = _similarities[c, r]; _similarities[r, r] = 1; for (int c = r + 1; c < _columns; c++) _similarities[r, c] = (float)Kernel.KernelFunction(nodes[r], nodes[c], param); } }
public Kernel(int l, Node[][] x_, Parameter param) { _kernelType = param.KernelType; _degree = param.Degree; _gamma = param.Gamma; _coef0 = param.Coefficient0; _x = (Node[][])x_.Clone(); if (_kernelType == KernelType.RBF) { _xSquare = new double[l]; for (int i = 0; i < l; i++) _xSquare[i] = dot(_x[i], _x[i]); } else _xSquare = null; }
public static double KernelFunction(Node[] x, Node[] y, Parameter param) { switch (param.KernelType) { case KernelType.LINEAR: return dot(x, y); case KernelType.POLY: return powi(param.Degree * dot(x, y) + param.Coefficient0, param.Degree); case KernelType.RBF: { double sum = computeSquaredDistance(x, y); return Math.Exp(-param.Gamma * sum); } case KernelType.SIGMOID: return Math.Tanh(param.Gamma * dot(x, y) + param.Coefficient0); case KernelType.PRECOMPUTED: return x[(int)(y[0].Value)].Value; default: return 0; } }
private void DoSearch( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, StreamWriter output, int nrfold, ref double C, ref double Gamma, ref double crossValidation) { for (int i = 0; i < CValues.Count; i++) { for (int j = 0; j < GammaValues.Count; j++) { if (!_Cs.Contains(CValues[i]) || !_Gammas.Contains(GammaValues[j])) { parameters.C = CValues[i]; parameters.Gamma = GammaValues[j]; double test = Training.PerformCrossValidation(problem, parameters, nrfold); Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (output != null) output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (test > crossValidation) { C = parameters.C; Gamma = parameters.Gamma; crossValidation = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); _Gammas.Add(GammaValues[j]); } _crossIterations++; OnEndEpoch(); } _Cs.Add(CValues[i]); } }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if validation data isn't available, as it will /// divide the training data and train on a portion of it and test on the rest. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The set of C values to use</param> /// <param name="GammaValues">The set of Gamma values to use</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="nrfold">The number of times the data should be divided for validation</param> /// <param name="C">The optimal C value will be placed in this variable</param> /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param> public void Grid( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, int nrfold, out double C, out double Gamma) { C = 0; Gamma = 0; double crossValidation = double.MinValue; StreamWriter output = null; if (outputFile != null) output = new StreamWriter(outputFile); _Cs = new List<double>(); _Gammas = new List<double>(); _cycles = CValues.Count * GammaValues.Count + (int)Math.Pow(FINE_STEP_WIDTH * 2 + 1, 2); _crossIterations = 0; DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation); // Tính lại các giá trị cho search kỹ double dblCurrentCPow = Math.Log(C, 2); double dblCurrentGammaPow = Math.Log(Gamma, 2); CValues = GetList(dblCurrentCPow - C_FINE_STEP * FINE_STEP_WIDTH, dblCurrentCPow + C_FINE_STEP * FINE_STEP_WIDTH, C_FINE_STEP); GammaValues = GetList(dblCurrentGammaPow - G_FINE_STEP * FINE_STEP_WIDTH, dblCurrentGammaPow + G_FINE_STEP * FINE_STEP_WIDTH, G_FINE_STEP); DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation); if (output != null) output.Close(); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The set of C values to use</param> /// <param name="GammaValues">The set of Gamma values to use</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="C">The optimal C value will be put into this variable</param> /// <param name="Gamma">The optimal Gamma value will be put into this variable</param> public void Grid( Problem problem, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. The default ranges of C and Gamma values are used. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="outputFile">Output file for the parameter results.</param> /// <param name="C">The optimal C value will be put into this variable</param> /// <param name="Gamma">The optimal Gamma value will be put into this variable</param> public void Grid( Problem problem, Parameter parameters, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, NFOLD, out C, out Gamma); }
/// <summary> /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. /// </summary> /// <param name="problem">The training data</param> /// <param name="validation">The validation data</param> /// <param name="parameters">The parameters to use when optimizing</param> /// <param name="CValues">The C values to use</param> /// <param name="GammaValues">The Gamma values to use</param> /// <param name="outputFile">The output file for the parameter results</param> /// <param name="C">The optimal C value will be placed in this variable</param> /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param> public static void Grid( Problem problem, Problem validation, Parameter parameters, List<double> CValues, List<double> GammaValues, string outputFile, out double C, out double Gamma) { C = 0; Gamma = 0; double maxScore = double.MinValue; StreamWriter output = null; if (outputFile != null) output = new StreamWriter(outputFile); for (int i = 0; i < CValues.Count; i++) for (int j = 0; j < GammaValues.Count; j++) { parameters.C = CValues[i]; parameters.Gamma = GammaValues[j]; Model model = Training.Train(problem, parameters); double test = Prediction.Predict(validation, "tmp.txt", model, false); Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (output != null) output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (test > maxScore) { C = parameters.C; Gamma = parameters.Gamma; maxScore = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); } if (output != null) output.Close(); }
/// <summary> /// Phần train cho SVM /// </summary> private void TrainSVM(bool isBatchMode) { string strTrainFile = null; if (isBatchMode) { strTrainFile = _trainFilePath; } else { strTrainFile = tbxTrainFilePath.Text; } int iPos = strTrainFile.LastIndexOf('_'); string strMutualPath = strTrainFile.Remove(iPos + 1); string strModelFile = strMutualPath + "model.txt"; Problem prob = Problem.Read(strTrainFile); Parameter param = new Parameter(); if (cmbModelSelection.SelectedItem.ToString() == "Grid search") { string strLogFile = strMutualPath + "Grid.txt"; double dblC; double dblGamma; ParameterSelection paramSel = new ParameterSelection(); paramSel.NFOLD = Int32.Parse(tbxNumFold.Text); paramSel.EndEpochEvent += new CrossEpochEventHandler( delegate(object senderNetwork, CrossEpochEventArgs args) { tlsProgressBar.Value = (int)(args.TrainingIteration * 100d / args.Cycles); tlsStatus.Text = "Current parameter set: " + args.TrainingIteration; Application.DoEvents(); }); paramSel.Grid(prob, param, strLogFile, out dblC, out dblGamma); param.C = dblC; param.Gamma = dblGamma; param.Probability = ckbProbEstimate.Checked; Model model = Training.Train(prob, param); Model.Write(strModelFile, model); tlsProgressBar.Value = 0; } else if (cmbModelSelection.SelectedItem.ToString() == "Use default values") { if (tbxC.Text == "" || tbxGamma.Text == "") { MessageBox.Show("Please fill in parameters!"); return; } param.C = double.Parse(tbxC.Text); param.Gamma = double.Parse(tbxGamma.Text); param.Probability = ckbProbEstimate.Checked; Model model = Training.Train(prob, param); Model.Write(strModelFile, model); } }
/// <summary> /// Phần train cho K-SVMeans /// </summary> private void TrainKSVMeans(bool isBatchMode) { string strTrainFile = null; if (isBatchMode) { strTrainFile = _trainFilePath; } else { strTrainFile = tbxTrainFilePath.Text; } int iNumCluster = (int)nmNumCluster.Value; int iPos = strTrainFile.LastIndexOf('_'); string strMutualPath = strTrainFile.Remove(iPos + 1); string strClusterModelFile = strMutualPath + "_clusterModel.txt"; string[] strClusterResultFiles = new string[iNumCluster]; string[] strSVMModelFiles = new string[iNumCluster]; for (int i = 0; i < iNumCluster; i++) { strClusterResultFiles[i] = strMutualPath + "cluster" + (i + 1).ToString() + ".txt"; strSVMModelFiles[i] = strMutualPath + "model" + (i + 1).ToString() + ".txt"; } // Thực hiện cluster SampleDataBUS samDataBUS = new SampleDataBUS(); samDataBUS.Read(strTrainFile); Clustering clustering = new Clustering(iNumCluster, samDataBUS.Samples, DistanceType.Manhattan); clustering.Run(strClusterModelFile, false); samDataBUS.WriteIntoCluster(strClusterResultFiles, clustering.SampleData.ClusterIndices); // Thực hiện train SVM int iProgressBaseline = 0; for (int i = 0; i < iNumCluster; i++) { Problem prob = Problem.Read(strClusterResultFiles[i]); Parameter param = new Parameter(); iProgressBaseline = i * 100 / iNumCluster; if (cmbModelSelection.SelectedItem.ToString() == "Grid search") { string strLogFile = strMutualPath + "GridCluster" + (i + 1).ToString() + ".txt"; double dblC; double dblGamma; ParameterSelection paramSel = new ParameterSelection(); paramSel.NFOLD = Int32.Parse(tbxNumFold.Text); paramSel.EndEpochEvent += new CrossEpochEventHandler( delegate(object senderNetwork, CrossEpochEventArgs args) { tlsProgressBar.Value = iProgressBaseline + (int)(args.TrainingIteration * 100d / (args.Cycles * iNumCluster)); tlsStatus.Text = "Cluster: " + (i + 1) + " | Current parameter set: " + args.TrainingIteration; Application.DoEvents(); }); paramSel.Grid(prob, param, strLogFile, out dblC, out dblGamma); param.C = dblC; param.Gamma = dblGamma; param.Probability = ckbProbEstimate.Checked; Model model = Training.Train(prob, param); Model.Write(strSVMModelFiles[i], model); } else if (cmbModelSelection.SelectedItem.ToString() == "Use default values") { if (tbxC.Text == "" || tbxGamma.Text == "") { MessageBox.Show("Please fill in parameters!"); return; } param.C = double.Parse(tbxC.Text); param.Gamma = double.Parse(tbxGamma.Text); param.Probability = ckbProbEstimate.Checked; Model model = Training.Train(prob, param); Model.Write(strSVMModelFiles[i], model); } } tlsProgressBar.Value = 0; }
/// <summary> /// Reads a Model from the provided stream. /// </summary> /// <param name="stream">The stream from which to read the Model.</param> /// <returns>the Model</returns> public static Model Read(Stream stream) { TemporaryCulture.Start(); StreamReader input = new StreamReader(stream); // read parameters Model model = new Model(); Parameter param = new Parameter(); model.Parameter = param; model.Rho = null; model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null; model.ClassLabels = null; model.NumberOfSVPerClass = null; bool headerFinished = false; while (!headerFinished) { string line = input.ReadLine(); string cmd, arg; int splitIndex = line.IndexOf(' '); if (splitIndex >= 0) { cmd = line.Substring(0, splitIndex); arg = line.Substring(splitIndex + 1); } else { cmd = line; arg = ""; } arg = arg.ToLower(); int i, n; switch (cmd) { case "svm_type": param.SvmType = (SvmType)Enum.Parse(typeof(SvmType), arg.ToUpper()); break; case "kernel_type": param.KernelType = (KernelType)Enum.Parse(typeof(KernelType), arg.ToUpper()); break; case "degree": param.Degree = int.Parse(arg); break; case "gamma": param.Gamma = double.Parse(arg); break; case "coef0": param.Coefficient0 = double.Parse(arg); break; case "nr_class": model.NumberOfClasses = int.Parse(arg); break; case "total_sv": model.SupportVectorCount = int.Parse(arg); break; case "rho": n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2; model.Rho = new double[n]; string[] rhoParts = arg.Split(); for (i = 0; i < n; i++) model.Rho[i] = double.Parse(rhoParts[i]); break; case "label": n = model.NumberOfClasses; model.ClassLabels = new int[n]; string[] labelParts = arg.Split(); for (i = 0; i < n; i++) model.ClassLabels[i] = int.Parse(labelParts[i]); break; case "probA": n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2; model.PairwiseProbabilityA = new double[n]; string[] probAParts = arg.Split(); for (i = 0; i < n; i++) model.PairwiseProbabilityA[i] = double.Parse(probAParts[i]); break; case "probB": n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2; model.PairwiseProbabilityB = new double[n]; string[] probBParts = arg.Split(); for (i = 0; i < n; i++) model.PairwiseProbabilityB[i] = double.Parse(probBParts[i]); break; case "nr_sv": n = model.NumberOfClasses; model.NumberOfSVPerClass = new int[n]; string[] nrsvParts = arg.Split(); for (i = 0; i < n; i++) model.NumberOfSVPerClass[i] = int.Parse(nrsvParts[i]); break; case "SV": headerFinished = true; break; default: throw new Exception("Unknown text in model file"); } } // read sv_coef and SV int m = model.NumberOfClasses - 1; int l = model.SupportVectorCount; model.SupportVectorCoefficients = new double[m][]; for (int i = 0; i < m; i++) { model.SupportVectorCoefficients[i] = new double[l]; } model.SupportVectors = new Node[l][]; for (int i = 0; i < l; i++) { string[] parts = input.ReadLine().Trim().Split(); for (int k = 0; k < m; k++) model.SupportVectorCoefficients[k][i] = double.Parse(parts[k]); int n = parts.Length - m; model.SupportVectors[i] = new Node[n]; for (int j = 0; j < n; j++) { string[] nodeParts = parts[m + j].Split(':'); model.SupportVectors[i][j] = new Node(); model.SupportVectors[i][j].Index = int.Parse(nodeParts[0]); model.SupportVectors[i][j].Value = double.Parse(nodeParts[1]); } } TemporaryCulture.Stop(); return model; }
/// <summary> /// Trains a model using the provided training data and parameters. /// </summary> /// <param name="problem">The training data</param> /// <param name="parameters">The parameters to use</param> /// <returns>A trained SVM Model</returns> public static Model Train(Problem problem, Parameter parameters) { string error = Procedures.svm_check_parameter(problem, parameters); if (error == null) return Procedures.svm_train(problem, parameters); else throw new Exception(error); }
private static void parseCommandLine(string[] args, out Parameter parameters, out Problem problem, out bool crossValidation, out int nrfold, out string modelFilename) { int i; parameters = new Parameter(); // default values crossValidation = false; nrfold = 0; // parse options for (i = 0; i < args.Length; i++) { if (args[i][0] != '-') break; ++i; switch (args[i - 1][1]) { case 's': parameters.SvmType = (SvmType)int.Parse(args[i]); break; case 't': parameters.KernelType = (KernelType)int.Parse(args[i]); break; case 'd': parameters.Degree = int.Parse(args[i]); break; case 'g': parameters.Gamma = double.Parse(args[i]); break; case 'r': parameters.Coefficient0 = double.Parse(args[i]); break; case 'n': parameters.Nu = double.Parse(args[i]); break; case 'm': parameters.CacheSize = double.Parse(args[i]); break; case 'c': parameters.C = double.Parse(args[i]); break; case 'e': parameters.EPS = double.Parse(args[i]); break; case 'p': parameters.P = double.Parse(args[i]); break; case 'h': parameters.Shrinking = int.Parse(args[i]) == 1; break; case 'b': parameters.Probability = int.Parse(args[i]) == 1; break; case 'v': crossValidation = true; nrfold = int.Parse(args[i]); if (nrfold < 2) { throw new ArgumentException("n-fold cross validation: n must >= 2"); } break; case 'w': parameters.Weights[int.Parse(args[i - 1].Substring(2))] = double.Parse(args[1]); break; default: throw new ArgumentException("Unknown Parameter"); } } // determine filenames if (i >= args.Length) throw new ArgumentException("No input file specified"); problem = Problem.Read(args[i]); if (parameters.Gamma == 0) parameters.Gamma = 1.0 / problem.MaxIndex; if (i < args.Length - 1) modelFilename = args[i + 1]; else { int p = args[i].LastIndexOf('/') + 1; modelFilename = args[i].Substring(p) + ".model"; } }
private static double doCrossValidation(Problem problem, Parameter parameters, int nr_fold) { int i; double[] target = new double[problem.Count]; Procedures.svm_cross_validation(problem, parameters, nr_fold, target); int total_correct = 0; double total_error = 0; double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; if (parameters.SvmType == SvmType.EPSILON_SVR || parameters.SvmType == SvmType.NU_SVR) { for (i = 0; i < problem.Count; i++) { double y = problem.Y[i]; double v = target[i]; total_error += (v - y) * (v - y); sumv += v; sumy += y; sumvv += v * v; sumyy += y * y; sumvy += v * y; } return (problem.Count * sumvy - sumv * sumy) / (Math.Sqrt(problem.Count * sumvv - sumv * sumv) * Math.Sqrt(problem.Count * sumyy - sumy * sumy)); } else for (i = 0; i < problem.Count; i++) if (target[i] == problem.Y[i]) ++total_correct; return (double)total_correct / problem.Count; }