Пример #1
0
 /// <summary>
 /// Performs cross validation.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to test</param>
 /// <param name="nrfold">The number of cross validations to use</param>
 /// <returns>The cross validation score</returns>
 public static double PerformCrossValidation(Problem problem, Parameter parameters, int nrfold)
 {
     string error = Procedures.svm_check_parameter(problem, parameters);
     if (error == null)
         return doCrossValidation(problem, parameters, nrfold);
     else throw new Exception(error);
 }
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="rows">Nodes to use as the rows of the matrix</param>
 /// <param name="columns">Nodes to use as the columns of the matrix</param>
 /// <param name="param">Parameters to use when compute similarities</param>
 public PrecomputedKernel(List<Node[]> rows, List<Node[]> columns, Parameter param)
 {
     _rows = rows.Count;
     _columns = columns.Count;
     _similarities = new float[_rows, _columns];
     for (int r = 0; r < _rows; r++)
         for (int c = 0; c < _columns; c++)
             _similarities[r, c] = (float)Kernel.KernelFunction(rows[r], columns[c], param);
 }
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="nodes">Nodes for self-similarity analysis</param>
 /// <param name="param">Parameters to use when computing similarities</param>
 public PrecomputedKernel(List<Node[]> nodes, Parameter param)
 {
     _rows = nodes.Count;
     _columns = _rows;
     _similarities = new float[_rows, _columns];
     for (int r = 0; r < _rows; r++)
     {
         for (int c = 0; c < r; c++)
             _similarities[r, c] = _similarities[c, r];
         _similarities[r, r] = 1;
         for (int c = r + 1; c < _columns; c++)
             _similarities[r, c] = (float)Kernel.KernelFunction(nodes[r], nodes[c], param);
     }
 }
Пример #4
0
        public Kernel(int l, Node[][] x_, Parameter param)
        {
            _kernelType = param.KernelType;
            _degree = param.Degree;
            _gamma = param.Gamma;
            _coef0 = param.Coefficient0;

            _x = (Node[][])x_.Clone();

            if (_kernelType == KernelType.RBF)
            {
                _xSquare = new double[l];
                for (int i = 0; i < l; i++)
                    _xSquare[i] = dot(_x[i], _x[i]);
            }
            else _xSquare = null;
        }
Пример #5
0
 public static double KernelFunction(Node[] x, Node[] y, Parameter param)
 {
     switch (param.KernelType)
     {
         case KernelType.LINEAR:
             return dot(x, y);
         case KernelType.POLY:
             return powi(param.Degree * dot(x, y) + param.Coefficient0, param.Degree);
         case KernelType.RBF:
             {
                 double sum = computeSquaredDistance(x, y);
                 return Math.Exp(-param.Gamma * sum);
             }
         case KernelType.SIGMOID:
             return Math.Tanh(param.Gamma * dot(x, y) + param.Coefficient0);
         case KernelType.PRECOMPUTED:
             return x[(int)(y[0].Value)].Value;
         default:
             return 0;
     }
 }
 private void DoSearch(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     StreamWriter output,
     int nrfold,
     ref double C,
     ref double Gamma,
     ref double crossValidation)
 {
     for (int i = 0; i < CValues.Count; i++)
     {
         for (int j = 0; j < GammaValues.Count; j++)
         {
             if (!_Cs.Contains(CValues[i]) || !_Gammas.Contains(GammaValues[j]))
             {
                 parameters.C = CValues[i];
                 parameters.Gamma = GammaValues[j];
                 double test = Training.PerformCrossValidation(problem, parameters, nrfold);
                 Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test);
                 if (output != null)
                     output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
                 if (test > crossValidation)
                 {
                     C = parameters.C;
                     Gamma = parameters.Gamma;
                     crossValidation = test;
                     Console.WriteLine(" New Maximum!");
                 }
                 else Console.WriteLine();
                 _Gammas.Add(GammaValues[j]);
             }
             _crossIterations++;
             OnEndEpoch();
         }
         _Cs.Add(CValues[i]);
     }
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if validation data isn't available, as it will
 /// divide the training data and train on a portion of it and test on the rest.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="nrfold">The number of times the data should be divided for validation</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     int nrfold,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double crossValidation = double.MinValue;
     StreamWriter output = null;
     if (outputFile != null)
         output = new StreamWriter(outputFile);
     _Cs = new List<double>();
     _Gammas = new List<double>();
     _cycles = CValues.Count * GammaValues.Count + (int)Math.Pow(FINE_STEP_WIDTH * 2 + 1, 2);
     _crossIterations = 0;
     DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation);
     // Tính lại các giá trị cho search kỹ
     double dblCurrentCPow = Math.Log(C, 2);
     double dblCurrentGammaPow = Math.Log(Gamma, 2);
     CValues = GetList(dblCurrentCPow - C_FINE_STEP * FINE_STEP_WIDTH, dblCurrentCPow + C_FINE_STEP * FINE_STEP_WIDTH, C_FINE_STEP);
     GammaValues = GetList(dblCurrentGammaPow - G_FINE_STEP * FINE_STEP_WIDTH, dblCurrentGammaPow + G_FINE_STEP * FINE_STEP_WIDTH, G_FINE_STEP);
     DoSearch(problem, parameters, CValues, GammaValues, output, nrfold, ref C, ref Gamma, ref crossValidation);
     if (output != null)
         output.Close();
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if there is no validation data available, and it will
 /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times).
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="C">The optimal C value will be put into this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be put into this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma);
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  The default ranges of C and Gamma values are used.  Use this method if there is no validation data available, and it will
 /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times).
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="C">The optimal C value will be put into this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be put into this variable</param>
 public void Grid(
     Problem problem,
     Parameter parameters,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, NFOLD, out C, out Gamma);
 }
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="validation">The validation data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The C values to use</param>
 /// <param name="GammaValues">The Gamma values to use</param>
 /// <param name="outputFile">The output file for the parameter results</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public static void Grid(
     Problem problem,
     Problem validation,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double maxScore = double.MinValue;
     StreamWriter output = null;
     if (outputFile != null)
         output = new StreamWriter(outputFile);
     for (int i = 0; i < CValues.Count; i++)
         for (int j = 0; j < GammaValues.Count; j++)
         {
             parameters.C = CValues[i];
             parameters.Gamma = GammaValues[j];
             Model model = Training.Train(problem, parameters);
             double test = Prediction.Predict(validation, "tmp.txt", model, false);
             Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (output != null)
                 output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (test > maxScore)
             {
                 C = parameters.C;
                 Gamma = parameters.Gamma;
                 maxScore = test;
                 Console.WriteLine(" New Maximum!");
             }
             else Console.WriteLine();
         }
     if (output != null)
         output.Close();
 }
Пример #11
0
        /// <summary>
        /// Phần train cho SVM
        /// </summary>
        private void TrainSVM(bool isBatchMode)
        {
            string strTrainFile = null;
            if (isBatchMode)
            {
                strTrainFile = _trainFilePath;
            }
            else
            {
                strTrainFile = tbxTrainFilePath.Text;
            }

            int iPos = strTrainFile.LastIndexOf('_');
            string strMutualPath = strTrainFile.Remove(iPos + 1);
            string strModelFile = strMutualPath + "model.txt";
            Problem prob = Problem.Read(strTrainFile);
            Parameter param = new Parameter();

            if (cmbModelSelection.SelectedItem.ToString() == "Grid search")
            {
                string strLogFile = strMutualPath + "Grid.txt";
                double dblC;
                double dblGamma;
                ParameterSelection paramSel = new ParameterSelection();
                paramSel.NFOLD = Int32.Parse(tbxNumFold.Text);
                paramSel.EndEpochEvent += new CrossEpochEventHandler(
                    delegate(object senderNetwork, CrossEpochEventArgs args)
                    {
                        tlsProgressBar.Value = (int)(args.TrainingIteration * 100d / args.Cycles);
                        tlsStatus.Text = "Current parameter set: " + args.TrainingIteration;
                        Application.DoEvents();
                    });
                paramSel.Grid(prob, param, strLogFile, out dblC, out dblGamma);
                param.C = dblC;
                param.Gamma = dblGamma;
                param.Probability = ckbProbEstimate.Checked;
                Model model = Training.Train(prob, param);
                Model.Write(strModelFile, model);
                tlsProgressBar.Value = 0;
            }
            else if (cmbModelSelection.SelectedItem.ToString() == "Use default values")
            {
                if (tbxC.Text == "" || tbxGamma.Text == "")
                {
                    MessageBox.Show("Please fill in parameters!");
                    return;
                }
                param.C = double.Parse(tbxC.Text);
                param.Gamma = double.Parse(tbxGamma.Text);
                param.Probability = ckbProbEstimate.Checked;
                Model model = Training.Train(prob, param);
                Model.Write(strModelFile, model);
            }
        }
Пример #12
0
        /// <summary>
        /// Phần train cho K-SVMeans
        /// </summary>
        private void TrainKSVMeans(bool isBatchMode)
        {
            string strTrainFile = null;

            if (isBatchMode)
            {
                strTrainFile = _trainFilePath;
            }
            else
            {
                strTrainFile = tbxTrainFilePath.Text;
            }

            int iNumCluster = (int)nmNumCluster.Value;
            int iPos = strTrainFile.LastIndexOf('_');
            string strMutualPath = strTrainFile.Remove(iPos + 1);
            string strClusterModelFile = strMutualPath + "_clusterModel.txt";
            string[] strClusterResultFiles = new string[iNumCluster];
            string[] strSVMModelFiles = new string[iNumCluster];

            for (int i = 0; i < iNumCluster; i++)
            {
                strClusterResultFiles[i] = strMutualPath + "cluster" + (i + 1).ToString() + ".txt";
                strSVMModelFiles[i] = strMutualPath + "model" + (i + 1).ToString() + ".txt";
            }
            // Thực hiện cluster
            SampleDataBUS samDataBUS = new SampleDataBUS();
            samDataBUS.Read(strTrainFile);
            Clustering clustering = new Clustering(iNumCluster, samDataBUS.Samples, DistanceType.Manhattan);
            clustering.Run(strClusterModelFile, false);
            samDataBUS.WriteIntoCluster(strClusterResultFiles, clustering.SampleData.ClusterIndices);
            // Thực hiện train SVM
            int iProgressBaseline = 0;
            for (int i = 0; i < iNumCluster; i++)
            {
                Problem prob = Problem.Read(strClusterResultFiles[i]);
                Parameter param = new Parameter();
                iProgressBaseline = i * 100 / iNumCluster;
                if (cmbModelSelection.SelectedItem.ToString() == "Grid search")
                {
                    string strLogFile = strMutualPath + "GridCluster" + (i + 1).ToString() + ".txt";
                    double dblC;
                    double dblGamma;
                    ParameterSelection paramSel = new ParameterSelection();
                    paramSel.NFOLD = Int32.Parse(tbxNumFold.Text);
                    paramSel.EndEpochEvent += new CrossEpochEventHandler(
                    delegate(object senderNetwork, CrossEpochEventArgs args)
                    {
                        tlsProgressBar.Value = iProgressBaseline + (int)(args.TrainingIteration * 100d / (args.Cycles * iNumCluster));
                        tlsStatus.Text = "Cluster: " + (i + 1) + " | Current parameter set: " + args.TrainingIteration;
                        Application.DoEvents();
                    });
                    paramSel.Grid(prob, param, strLogFile, out dblC, out dblGamma);
                    param.C = dblC;
                    param.Gamma = dblGamma;
                    param.Probability = ckbProbEstimate.Checked;
                    Model model = Training.Train(prob, param);
                    Model.Write(strSVMModelFiles[i], model);
                }
                else if (cmbModelSelection.SelectedItem.ToString() == "Use default values")
                {
                    if (tbxC.Text == "" || tbxGamma.Text == "")
                    {
                        MessageBox.Show("Please fill in parameters!");
                        return;
                    }
                    param.C = double.Parse(tbxC.Text);
                    param.Gamma = double.Parse(tbxGamma.Text);
                    param.Probability = ckbProbEstimate.Checked;
                    Model model = Training.Train(prob, param);
                    Model.Write(strSVMModelFiles[i], model);
                }

            }
            tlsProgressBar.Value = 0;
        }
Пример #13
0
        /// <summary>
        /// Reads a Model from the provided stream.
        /// </summary>
        /// <param name="stream">The stream from which to read the Model.</param>
        /// <returns>the Model</returns>
        public static Model Read(Stream stream)
        {
            TemporaryCulture.Start();

            StreamReader input = new StreamReader(stream);

            // read parameters

            Model model = new Model();
            Parameter param = new Parameter();
            model.Parameter = param;
            model.Rho = null;
            model.PairwiseProbabilityA = null;
            model.PairwiseProbabilityB = null;
            model.ClassLabels = null;
            model.NumberOfSVPerClass = null;

            bool headerFinished = false;
            while (!headerFinished)
            {
                string line = input.ReadLine();
                string cmd, arg;
                int splitIndex = line.IndexOf(' ');
                if (splitIndex >= 0)
                {
                    cmd = line.Substring(0, splitIndex);
                    arg = line.Substring(splitIndex + 1);
                }
                else
                {
                    cmd = line;
                    arg = "";
                }
                arg = arg.ToLower();

                int i, n;
                switch (cmd)
                {
                    case "svm_type":
                        param.SvmType = (SvmType)Enum.Parse(typeof(SvmType), arg.ToUpper());
                        break;

                    case "kernel_type":
                        param.KernelType = (KernelType)Enum.Parse(typeof(KernelType), arg.ToUpper());
                        break;

                    case "degree":
                        param.Degree = int.Parse(arg);
                        break;

                    case "gamma":
                        param.Gamma = double.Parse(arg);
                        break;

                    case "coef0":
                        param.Coefficient0 = double.Parse(arg);
                        break;

                    case "nr_class":
                        model.NumberOfClasses = int.Parse(arg);
                        break;

                    case "total_sv":
                        model.SupportVectorCount = int.Parse(arg);
                        break;

                    case "rho":
                        n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2;
                        model.Rho = new double[n];
                        string[] rhoParts = arg.Split();
                        for (i = 0; i < n; i++)
                            model.Rho[i] = double.Parse(rhoParts[i]);
                        break;

                    case "label":
                        n = model.NumberOfClasses;
                        model.ClassLabels = new int[n];
                        string[] labelParts = arg.Split();
                        for (i = 0; i < n; i++)
                            model.ClassLabels[i] = int.Parse(labelParts[i]);
                        break;

                    case "probA":
                        n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2;
                        model.PairwiseProbabilityA = new double[n];
                        string[] probAParts = arg.Split();
                        for (i = 0; i < n; i++)
                            model.PairwiseProbabilityA[i] = double.Parse(probAParts[i]);
                        break;

                    case "probB":
                        n = model.NumberOfClasses * (model.NumberOfClasses - 1) / 2;
                        model.PairwiseProbabilityB = new double[n];
                        string[] probBParts = arg.Split();
                        for (i = 0; i < n; i++)
                            model.PairwiseProbabilityB[i] = double.Parse(probBParts[i]);
                        break;

                    case "nr_sv":
                        n = model.NumberOfClasses;
                        model.NumberOfSVPerClass = new int[n];
                        string[] nrsvParts = arg.Split();
                        for (i = 0; i < n; i++)
                            model.NumberOfSVPerClass[i] = int.Parse(nrsvParts[i]);
                        break;

                    case "SV":
                        headerFinished = true;
                        break;

                    default:
                        throw new Exception("Unknown text in model file");
                }
            }

            // read sv_coef and SV

            int m = model.NumberOfClasses - 1;
            int l = model.SupportVectorCount;
            model.SupportVectorCoefficients = new double[m][];
            for (int i = 0; i < m; i++)
            {
                model.SupportVectorCoefficients[i] = new double[l];
            }
            model.SupportVectors = new Node[l][];

            for (int i = 0; i < l; i++)
            {
                string[] parts = input.ReadLine().Trim().Split();

                for (int k = 0; k < m; k++)
                    model.SupportVectorCoefficients[k][i] = double.Parse(parts[k]);
                int n = parts.Length - m;
                model.SupportVectors[i] = new Node[n];
                for (int j = 0; j < n; j++)
                {
                    string[] nodeParts = parts[m + j].Split(':');
                    model.SupportVectors[i][j] = new Node();
                    model.SupportVectors[i][j].Index = int.Parse(nodeParts[0]);
                    model.SupportVectors[i][j].Value = double.Parse(nodeParts[1]);
                }
            }

            TemporaryCulture.Stop();

            return model;
        }
Пример #14
0
        /// <summary>
        /// Trains a model using the provided training data and parameters.
        /// </summary>
        /// <param name="problem">The training data</param>
        /// <param name="parameters">The parameters to use</param>
        /// <returns>A trained SVM Model</returns>
        public static Model Train(Problem problem, Parameter parameters)
        {
            string error = Procedures.svm_check_parameter(problem, parameters);

            if (error == null)
                return Procedures.svm_train(problem, parameters);
            else throw new Exception(error);
        }
Пример #15
0
        private static void parseCommandLine(string[] args, out Parameter parameters, out Problem problem, out bool crossValidation, out int nrfold, out string modelFilename)
        {
            int i;

            parameters = new Parameter();
            // default values

            crossValidation = false;
            nrfold = 0;

            // parse options
            for (i = 0; i < args.Length; i++)
            {
                if (args[i][0] != '-')
                    break;
                ++i;
                switch (args[i - 1][1])
                {

                    case 's':
                        parameters.SvmType = (SvmType)int.Parse(args[i]);
                        break;

                    case 't':
                        parameters.KernelType = (KernelType)int.Parse(args[i]);
                        break;

                    case 'd':
                        parameters.Degree = int.Parse(args[i]);
                        break;

                    case 'g':
                        parameters.Gamma = double.Parse(args[i]);
                        break;

                    case 'r':
                        parameters.Coefficient0 = double.Parse(args[i]);
                        break;

                    case 'n':
                        parameters.Nu = double.Parse(args[i]);
                        break;

                    case 'm':
                        parameters.CacheSize = double.Parse(args[i]);
                        break;

                    case 'c':
                        parameters.C = double.Parse(args[i]);
                        break;

                    case 'e':
                        parameters.EPS = double.Parse(args[i]);
                        break;

                    case 'p':
                        parameters.P = double.Parse(args[i]);
                        break;

                    case 'h':
                        parameters.Shrinking = int.Parse(args[i]) == 1;
                        break;

                    case 'b':
                        parameters.Probability = int.Parse(args[i]) == 1;
                        break;

                    case 'v':
                        crossValidation = true;
                        nrfold = int.Parse(args[i]);
                        if (nrfold < 2)
                        {
                            throw new ArgumentException("n-fold cross validation: n must >= 2");
                        }
                        break;

                    case 'w':
                        parameters.Weights[int.Parse(args[i - 1].Substring(2))] = double.Parse(args[1]);
                        break;

                    default:
                        throw new ArgumentException("Unknown Parameter");
                }
            }

            // determine filenames

            if (i >= args.Length)
                throw new ArgumentException("No input file specified");

            problem = Problem.Read(args[i]);

            if (parameters.Gamma == 0)
                parameters.Gamma = 1.0 / problem.MaxIndex;

            if (i < args.Length - 1)
                modelFilename = args[i + 1];
            else
            {
                int p = args[i].LastIndexOf('/') + 1;
                modelFilename = args[i].Substring(p) + ".model";
            }
        }
Пример #16
0
 private static double doCrossValidation(Problem problem, Parameter parameters, int nr_fold)
 {
     int i;
     double[] target = new double[problem.Count];
     Procedures.svm_cross_validation(problem, parameters, nr_fold, target);
     int total_correct = 0;
     double total_error = 0;
     double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
     if (parameters.SvmType == SvmType.EPSILON_SVR || parameters.SvmType == SvmType.NU_SVR)
     {
         for (i = 0; i < problem.Count; i++)
         {
             double y = problem.Y[i];
             double v = target[i];
             total_error += (v - y) * (v - y);
             sumv += v;
             sumy += y;
             sumvv += v * v;
             sumyy += y * y;
             sumvy += v * y;
         }
         return (problem.Count * sumvy - sumv * sumy) / (Math.Sqrt(problem.Count * sumvv - sumv * sumv) * Math.Sqrt(problem.Count * sumyy - sumy * sumy));
     }
     else
         for (i = 0; i < problem.Count; i++)
             if (target[i] == problem.Y[i])
                 ++total_correct;
     return (double)total_correct / problem.Count;
 }