예제 #1
0
 /// <summary>
 /// Performs cross validation.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to test</param>
 /// <param name="nrfold">The number of cross validations to use</param>
 /// <returns>The cross validation score</returns>
 public static double PerformCrossValidation(Problem problem, Parameter parameters, int nrfold)
 {
     string error = Procedures.svm_check_parameter(problem, parameters);
     if (error == null)
         return doCrossValidation(problem, parameters, nrfold);
     else throw new Exception(error);
 }
예제 #2
0
        /// <summary>
        /// Determines the Gaussian transform for the provided problem.
        /// </summary>
        /// <param name="prob">The Problem to analyze</param>
        /// <returns>The Gaussian transform for the problem</returns>
        public static GaussianTransform Compute(Problem prob)
        {
            int[] counts = new int[prob.MaxIndex];
              double[] means = new double[prob.MaxIndex];
              foreach (Node[] sample in prob.X) {
            for (int i = 0; i < sample.Length; i++) {
              means[sample[i].Index - 1] += sample[i].Value;
              counts[sample[i].Index - 1]++;
            }
              }
              for (int i = 0; i < prob.MaxIndex; i++) {
            if (counts[i] == 0)
              counts[i] = 2;
            means[i] /= counts[i];
              }

              double[] stddevs = new double[prob.MaxIndex];
              foreach (Node[] sample in prob.X) {
            for (int i = 0; i < sample.Length; i++) {
              double diff = sample[i].Value - means[sample[i].Index - 1];
              stddevs[sample[i].Index - 1] += diff * diff;
            }
              }
              for (int i = 0; i < prob.MaxIndex; i++) {
            if (stddevs[i] == 0)
              continue;
            stddevs[i] /= (counts[i] - 1);
            stddevs[i] = Math.Sqrt(stddevs[i]);
              }

              return new GaussianTransform(means, stddevs);
        }
예제 #3
0
 /// <summary>
 /// Scales a problem using the provided range.  This will not affect the parameter.
 /// </summary>
 /// <param name="prob">The problem to scale</param>
 /// <param name="range">The Range transform to use in scaling</param>
 /// <returns>The Scaled problem</returns>
 public static Problem Scale(this IRangeTransform range, Problem prob)
 {
     Problem scaledProblem = new Problem(prob.Count, new double[prob.Count], new Node[prob.Count][], prob.MaxIndex);
     for (int i = 0; i < scaledProblem.Count; i++)
     {
         scaledProblem.X[i] = new Node[prob.X[i].Length];
         for (int j = 0; j < scaledProblem.X[i].Length; j++)
             scaledProblem.X[i][j] = new Node(prob.X[i][j].Index, range.Transform(prob.X[i][j].Value, prob.X[i][j].Index));
         scaledProblem.Y[i] = prob.Y[i];
     }
     return scaledProblem;
 }
예제 #4
0
        public Model train(Problem issue)
        {
            var span = Overseer.observe("Training.Parameter-Choosing");
            Parameter parameters = new Parameter();
            parameters.KernelType = KernelType.RBF;
            double C;
            double Gamma;

            ParameterSelection.Grid(issue, parameters, null, out C, out Gamma);
            parameters.C = C;
            parameters.Gamma = Gamma;
            span.die();
            span = Overseer.observe("Training.Training");
            var result = Training.Train(issue, parameters);
            span.die();
            return result;
        }
예제 #5
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        // Cross-validation decision values for probability estimates
        private static void svm_binary_svc_probability(Problem prob, Parameter param, double Cp, double Cn, double[] probAB)
        {
            int i;
            int nr_fold = 5;
            int[] perm = new int[prob.Count];
            double[] dec_values = new double[prob.Count];

            // random shuffle
            Random rand = new Random();
            for (i = 0; i < prob.Count; i++) perm[i] = i;
            for (i = 0; i < prob.Count; i++)
            {
                int j = i + (int)(rand.NextDouble() * (prob.Count - i));
                do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
            }
            for (i = 0; i < nr_fold; i++)
            {
                int begin = i * prob.Count / nr_fold;
                int end = (i + 1) * prob.Count / nr_fold;
                int j, k;
                Problem subprob = new Problem();

                subprob.Count = prob.Count - (end - begin);
                subprob.X = new Node[subprob.Count][];
                subprob.Y = new double[subprob.Count];

                k = 0;
                for (j = 0; j < begin; j++)
                {
                    subprob.X[k] = prob.X[perm[j]];
                    subprob.Y[k] = prob.Y[perm[j]];
                    ++k;
                }
                for (j = end; j < prob.Count; j++)
                {
                    subprob.X[k] = prob.X[perm[j]];
                    subprob.Y[k] = prob.Y[perm[j]];
                    ++k;
                }
                int p_count = 0, n_count = 0;
                for (j = 0; j < k; j++)
                    if (subprob.Y[j] > 0)
                        p_count++;
                    else
                        n_count++;

                if (p_count == 0 && n_count == 0)
                    for (j = begin; j < end; j++)
                        dec_values[perm[j]] = 0;
                else if (p_count > 0 && n_count == 0)
                    for (j = begin; j < end; j++)
                        dec_values[perm[j]] = 1;
                else if (p_count == 0 && n_count > 0)
                    for (j = begin; j < end; j++)
                        dec_values[perm[j]] = -1;
                else
                {
                    Parameter subparam = (Parameter)param.Clone();
                    subparam.Probability = false;
                    subparam.C = 1.0;
                    subparam.Weights[1] = Cp;
                    subparam.Weights[-1] = Cn;
                    Model submodel = svm_train(subprob, subparam);
                    for (j = begin; j < end; j++)
                    {
                        double[] dec_value = new double[1];
                        svm_predict_values(submodel, prob.X[perm[j]], dec_value);
                        dec_values[perm[j]] = dec_value[0];
                        // ensure +1 -1 order; reason not using CV subroutine
                        dec_values[perm[j]] *= submodel.ClassLabels[0];
                    }
                }
            }
            sigmoid_train(prob.Count, dec_values, prob.Y, probAB);
        }
예제 #6
0
        /// <summary>
        /// Predicts the class memberships of all the vectors in the problem.
        /// </summary>
        /// <param name="problem">The SVM Problem to solve</param>
        /// <param name="outputFile">File for result output</param>
        /// <param name="model">The Model to use</param>
        /// <param name="predict_probability">Whether to output a distribution over the classes</param>
        /// <returns>Percentage correctly labelled</returns>
        public static double Predict(
            Problem problem,
            string outputFile,
            Model model,
            bool predict_probability)
        {
            int correct = 0;
            int total = 0;
            double error = 0;
            double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
            StreamWriter output = outputFile != null ? new StreamWriter(outputFile) : null;

            SvmType svm_type = Procedures.svm_get_svm_type(model);
            int nr_class = Procedures.svm_get_nr_class(model);
            int[] labels = new int[nr_class];
            double[] prob_estimates = null;

            if (predict_probability)
            {
                if (svm_type == SvmType.EPSILON_SVR || svm_type == SvmType.NU_SVR)
                {
                    Console.WriteLine("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=" + Procedures.svm_get_svr_probability(model));
                }
                else
                {
                    Procedures.svm_get_labels(model, labels);
                    prob_estimates = new double[nr_class];
                    if (output != null)
                    {
                        output.Write("labels");
                        for (int j = 0; j < nr_class; j++)
                        {
                            output.Write(" " + labels[j]);
                        }
                        output.Write("\n");
                    }
                }
            }
            for (int i = 0; i < problem.Count; i++)
            {
                double target = problem.Y[i];
                Node[] x = problem.X[i];

                double v;
                if (predict_probability && (svm_type == SvmType.C_SVC || svm_type == SvmType.NU_SVC))
                {
                    v = Procedures.svm_predict_probability(model, x, prob_estimates);
                    if (output != null)
                    {
                        output.Write(v + " ");
                        for (int j = 0; j < nr_class; j++)
                        {
                            output.Write(prob_estimates[j] + " ");
                        }
                        output.Write("\n");
                    }
                }
                else
                {
                    v = Procedures.svm_predict(model, x);
                    if(output != null)
                        output.Write(v + "\n");
                }

                if (v == target)
                    ++correct;
                error += (v - target) * (v - target);
                sumv += v;
                sumy += target;
                sumvv += v * v;
                sumyy += target * target;
                sumvy += v * target;
                ++total;
            }
            if(output != null)
                output.Close();
            return (double)correct / total;
        }
예제 #7
0
        public void startSurfTrain()
        {
            List<FileInfo> trainingFiles = new List<FileInfo>(1000);
            DirectoryInfo di = new DirectoryInfo(Constants.base_folder + "train_" + Constants.CIRCLE_TRIANGLE);
            DirectoryInfo[] dirs = di.GetDirectories("*");
            foreach (DirectoryInfo dir in dirs)
            {
                int i = 0;
                FileInfo[] files = dir.GetFiles("*.bmp");
                foreach (FileInfo fi in files)
                {
                    trainingFiles.Add(fi);
                    if (i++ > Constants.MAX_TRAIN_SAMPLE)
                        break;
                }
            }

            double[] class_labels = new double[trainingFiles.Count];
            Node[][] nodes = new Node[trainingFiles.Count][];

            for (int i = 0; i < trainingFiles.Count; i++)
            {
                Bitmap bmp = (Bitmap)Bitmap.FromFile(trainingFiles[i].FullName, false);

                int com_x_sum = 0, com_y_sum = 0, com_x_y_point_count = 0;
                System.Drawing.Imaging.BitmapData image_data = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), System.Drawing.Imaging.ImageLockMode.ReadWrite, bmp.PixelFormat);
                int bpp = 3;
                int nOffset = image_data.Stride - bmp.Width * bpp;
                System.IntPtr Scan0 = image_data.Scan0;
                unsafe
                {
                    byte* p = (byte*)Scan0;
                    for (int y = 0; y < Constants.SIGN_HEIGHT; y++)
                    {
                        for (int x = 0; x < Constants.SIGN_WIDTH; x++, p += bpp)
                        {
                            if (p[2] == 0)
                            {
                                com_x_sum += x;
                                com_y_sum += y;
                                com_x_y_point_count++;
                            }
                        }
                        p += nOffset;
                    }
                }
                bmp.UnlockBits(image_data);
                int com_x = com_x_sum / com_x_y_point_count;
                int com_y = com_y_sum / com_x_y_point_count;

                Node[] nds = new Node[NNTrain.numOfinputs];
                nodes[i] = nds;

                bmp.Tag = trainingFiles[i].Name;
                fillFeatures_SURF(bmp, com_x, com_y, nds);
                class_labels[i] = Double.Parse(trainingFiles[i].Directory.Name);
            }
            Problem problem = new Problem(nodes.Length, class_labels, nodes, NNTrain.numOfinputs + 1);
            // RangeTransform range = Scaling.DetermineRange(problem);
            // problem = Scaling.Scale(problem, range);

            Parameter param = new Parameter();
            param.KernelType = KernelType.POLY;
            // param.KernelType = KernelType.LINEAR;
            // param.KernelType = KernelType.RBF;
            param.SvmType = SvmType.NU_SVC;

            param.C = 2;
            param.Gamma = .5;
            //param.KernelType = KernelType.POLY;

            /* double C, Gamma;
            ParameterSelection.Grid(problem, param, Constants.base_folder + "params_" + type + ".txt", out C, out Gamma);
            param.C = C;
            param.Gamma = Gamma;
            //param.Probability = true;
            */
            Model model = Training.Train(problem, param);

            Stream stream = new FileStream(Constants.base_folder + Constants.NN_SVM_SURF + "_" + Constants.CIRCLE_TRIANGLE + ".dat", FileMode.Create, FileAccess.Write, FileShare.None);
            BinaryFormatter b = new BinaryFormatter();
            b.Serialize(stream, model);
            stream.Close();
        }
예제 #8
0
        ///
        public override void Train()
        {
            int num_users = Feedback.UserMatrix.NumberOfRows;   // DH: should be based on MaxUserID for cold case? TODO: investigate.
            int num_items = Feedback.ItemMatrix.NumberOfRows;

            var svm_features = new List<Node[]>();

            Node[][] svm_features_array = svm_features.ToArray();
            var svm_parameters = new Parameter();
            svm_parameters.SvmType = SvmType.EPSILON_SVR;
            //svm_parameters.SvmType = SvmType.NU_SVR;
            svm_parameters.C     = this.c;
            svm_parameters.Gamma = this.gamma;

            // user-wise training
            this.models = new Model[num_users];
            for (int u = 0; u < num_users; u++)
            {
                var targets = new double[num_items];
                for (int i = 0; i < num_items; i++)
                    targets[i] = Feedback.UserMatrix[u, i] ? 1 : 0;

                Problem svm_problem = new Problem(svm_features.Count, targets, svm_features_array, NumItemAttributes - 1); // TODO check
                models[u] = SVM.Training.Train(svm_problem, svm_parameters);
            }
        }
예제 #9
0
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="validation">The validation data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The C values to use</param>
 /// <param name="GammaValues">The Gamma values to use</param>
 /// <param name="outputFile">The output file for the parameter results</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public static void Grid(
     Problem problem,
     Problem validation,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double maxScore = double.MinValue;
     StreamWriter output = null;
     if(outputFile != null)
         output = new StreamWriter(outputFile);
     for (int i = 0; i < CValues.Count; i++)
         for (int j = 0; j < GammaValues.Count; j++)
         {
             parameters.C = CValues[i];
             parameters.Gamma = GammaValues[j];
             Model model = Training.Train(problem, parameters);
             double test = Prediction.Predict(validation, "tmp.txt", model, false);
             Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if(output != null)
                 output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (test > maxScore)
             {
                 C = parameters.C;
                 Gamma = parameters.Gamma;
                 maxScore = test;
                 Console.WriteLine(" New Maximum!");
             }
             else Console.WriteLine();
         }
     if(output != null)
         output.Close();
 }
예제 #10
0
        public Problem CuckooSearch(Problem prob, out double storagePercentage)
        {
            int    nNests         = 5;    //number of nests, or number of solutions
            int    subsetSize     = 100;
            int    maxGen         = 5;    //maximum generation
            double discoveryRate  = 0.25; //discovery rate of alien eggs
            double tolerance      = Math.Exp(-5);
            int    lowerBound     = -5;
            int    upperBound     = 5;
            int    totalInstances = prob.X.Count(); //problem size

            double[] cuckooFitnessVal                = new double[nNests];
            double[] newCuckooFitnessVal             = new double[nNests];
            ObjectInstanceSelection globalBestCuckoo = null;
            double globalBest = double.MinValue;
            Random rand       = new Random();

            FlowerPollinationAlgorithm fpa = new FlowerPollinationAlgorithm();

            //initialize population
            List <ObjectInstanceSelection> cuckoos    = InitializeBinaryCuckoo(nNests, subsetSize, totalInstances, prob);
            List <ObjectInstanceSelection> newCuckoos = new List <ObjectInstanceSelection>(cuckoos.Count); //create a clone of bats

            cuckoos.ForEach((item) =>
            {
                newCuckoos.Add(new ObjectInstanceSelection(item.Attribute_Values, item.Attribute_Values_Continuous, item.Pointers, item.Fitness)); //create a clone of flowers
            });

            cuckooFitnessVal    = EvaluateObjectiveFunction(cuckoos, prob);                                                                //evaluate fitness value for all the bats
            newCuckooFitnessVal = EvaluateObjectiveFunction(newCuckoos, prob);                                                             //evaluate fitness value for new bats. Note: this will be the same for this function call, since pollination has not occur
            CuckooFitness(cuckooFitnessVal, cuckoos);                                                                                      //fitness value for each bats
            CuckooFitness(newCuckooFitnessVal, newCuckoos);                                                                                //fitness value for new bats
            globalBestCuckoo = EvaluateSolution(cuckooFitnessVal, newCuckooFitnessVal, globalBest, cuckoos, newCuckoos, globalBestCuckoo); //get the global best flower
            globalBest       = globalBestCuckoo.__Fitness;

            //generate new solutions
            double beta  = 3 / 2;
            double A     = fp.Gamma(1 + beta) * Math.Sin(Math.PI * (beta / 2));
            double B     = fp.Gamma((1 + beta) / 2) * beta;
            double C     = (beta - 1) / 2;
            double D     = Math.Pow(2, C);
            double E     = A / (B * D);
            double sigma = Math.Pow(E, (1 / beta));

            double F;
            double G;
            double step;
            double stepSize;
            int    x = 0;

            for (int i = 0; i <= maxGen; i++)
            {
                for (int j = 0; j < nNests; j++)
                {
                    for (int k = 0; k < subsetSize; k++)
                    {
                        F    = SimpleRNG.GetNormal() * sigma;
                        G    = SimpleRNG.GetNormal();
                        step = F / Math.Pow(Math.Abs(G), (1 / beta));

                        //In the next equation, the difference factor (s-best) means that when the solution is the best solution, it remains unchanged.
                        //Here the factor 0.01 comes from the fact that L/100 should the typical step size of walks/flights where L is the typical lenghtscale;
                        //otherwise, Levy flights may become too aggresive/efficient, which makes new solutions (even) jump out side of the design domain (and thus wasting evaluations).
                        stepSize = 0.01 * step * (cuckoos[j].Attribute_Values[k] - globalBestCuckoo.Attribute_Values[k]);

                        //Now the actual random walks or levyy flights
                        newCuckoos[j].Attribute_Values[k] = fi.Binarize((newCuckoos[j].Attribute_Values[k] + stepSize) * SimpleRNG.GetNormal(), rand.NextDouble());

                        if (cuckoos[j].Attribute_Values[k] == 1 && newCuckoos[j].Attribute_Values[k] == 0)
                        {
                            x++;
                        }
                    }
                }

                //discovery and randomization - replace some nest by constructing new solutions
                newCuckoos = EmptyNest(cuckoos, newCuckoos, discoveryRate, subsetSize, nNests);

                //Select best solutions from the original population and matured population for the next generation;
                fpa.SelectBestSolution(cuckoos, newCuckoos);

                //evaluate new solution
                newCuckooFitnessVal = EvaluateObjectiveFunction(newCuckoos, prob);                                                             //evaluate fitness value for all the bats
                CuckooFitness(newCuckooFitnessVal, newCuckoos);                                                                                //fitness value for new bats
                globalBestCuckoo = EvaluateSolution(cuckooFitnessVal, newCuckooFitnessVal, globalBest, cuckoos, newCuckoos, globalBestCuckoo); //get the global best flower
                globalBest       = globalBestCuckoo.Fitness;

                //if solution has converged to a optimal user-defined point, stop search
                int Max = 60;          // maximum percentage reduction
                if (globalBest >= Max) //if the percentage reduction has approached 60%, stop search!
                {
                    break;
                }
            }

            //ensure that at least, N instances are selected for classification
            int min = 40; //minimum number of selected instances

            globalBestCuckoo = fpa.AddInstances(globalBestCuckoo, min);

            Problem subBest = fi.buildModelMultiClass(globalBestCuckoo, prob); //build model for the best Instance Mast

            storagePercentage = Training.StoragePercentage(subBest, prob);     //calculate the percent of the original training set was retained by the reduction algorithm
            return(subBest);
        }
예제 #11
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        private static void solve_c_svc(Problem prob, Parameter param,
                        double[] alpha, Solver.SolutionInfo si,
                        double Cp, double Cn)
        {
            int l = prob.Count;
            double[] Minus_ones = new double[l];
            sbyte[] y = new sbyte[l];

            int i;

            for (i = 0; i < l; i++)
            {
                alpha[i] = 0;
                Minus_ones[i] = -1;
                if (prob.Y[i] > 0) y[i] = +1; else y[i] = -1;
            }

            Solver s = new Solver();
            s.Solve(l, new SVC_Q(prob, param, y), Minus_ones, y,
                alpha, Cp, Cn, param.EPS, si, param.Shrinking);

            double sum_alpha = 0;
            for (i = 0; i < l; i++)
                sum_alpha += alpha[i];

            if (Cp == Cn)
                Procedures.info("nu = " + sum_alpha / (Cp * prob.Count) + "\n");

            for (i = 0; i < l; i++)
                alpha[i] *= y[i];
        }
예제 #12
0
        //compute the k-nearest neighbour of all instances in the dataset
        public Problem computeNearestNeighbour(int k, Problem trainDataset, int numOfSubset)
        {
            double sum = 0;
            double distance;
            int    n    = trainDataset.Count; //number of data instances
            int    subN = 400;
            //int n = 800; //number of data instances
            List <Node[]> nearestNeighbours = new List <Node[]>();
            List <double> dist   = new List <double>();
            List <double> labels = new List <double>();
            List <int>    index  = new List <int>();

            Node[] xNodes = new Node[n];
            Node[] yNodes = new Node[n];
            object[,] obj = new object[subN, 4];
            List <object[, ]> objList = new List <object[, ]>();

            object[,] temp = new object[1, 3];
            List <Problem> ds = new List <Problem>();

            object[,] nn = new object[n, 7];         //data structure containing the NNs and their corresponding distances
            double score = 0;                        //score assigned to individual instance by the oppositiley NNs in its neighbourhood list

            object[,] scoreList  = new object[n, 4]; //scores assigned to all the instances
            object[,] dataSubset = new object[n, 3]; //subset of data to return

            //Get the neighbourhood list of all the instances in the dataset. That is, compute distance between Xi and other instances in the dataset.
            List <object[, ]> objSortedList = new List <object[, ]>();

            for (int i = 0; i < n; i++)
            {
                int ctr = 0; int cntr1 = 0; int cntr2 = 0; int a = 0; int b = 0;
                int countP = trainDataset.Y.Count(q => q == 1);
                int countN = trainDataset.Y.Count(q => q == -1);

                //generate unique random number
                //List<int> rNum = Training.GetRandomNumbers(2000, n);

                for (int j = 0; j < n; j++)
                {
                    if (j.Equals(i))
                    {
                        continue;
                    }
                    //randomly select N instances from dataset
                    else if (cntr1 < (subN * 0.5) && trainDataset.Y[j] == 1)                               //compute distance for positive class (50% of k goes for positive instances)
                    {
                        distance    = Kernel.computeSquaredDistance(trainDataset.X[i], trainDataset.X[j]); //compute the distance between Xi and all other instances in the dataset
                        obj[ctr, 0] = distance;
                        obj[ctr, 1] = trainDataset.X[j];
                        obj[ctr, 2] = trainDataset.Y[j];
                        obj[ctr, 3] = ctr; //save the index
                        ctr++;             //save the instance and their corresponding distances
                        cntr1++;
                    }
                    else if (cntr2 < (subN * 0.5) && trainDataset.Y[j] == -1)                              //compute distance for negative class (50% of k goes for negative instances)
                    {
                        distance    = Kernel.computeSquaredDistance(trainDataset.X[i], trainDataset.X[j]); //compute the distance between Xi and all other instances in the dataset
                        obj[ctr, 0] = distance;
                        obj[ctr, 1] = trainDataset.X[j];
                        obj[ctr, 2] = trainDataset.Y[j];
                        obj[ctr, 3] = ctr; //save the index
                        ctr++;             //save the instance and their corresponding distances
                        cntr2++;
                    }

                    //distance = Kernel.computeSquaredDistance(trainDataset.X[i], trainDataset.X[j]); //compute the distance between Xi and all other instances in the dataset
                    ////save the instance and their corresponding distances
                    //obj[a, 0] = distance;
                    //obj[a, 1] = trainDataset.X[j];
                    //obj[a, 2] = trainDataset.Y[j];
                    //obj[a, 3] = a; //save the index
                    //a++;
                }

                objList.Add(obj); //Data structure (or List), containing the instances and distances of K nearest neighbours of every instance in the dataset
                obj = new object[subN, 4];
            }

            //sort the data structure. That, sort(and retain index) the neighbourhood list of each instance in the dataset
            for (int i = 0; i < subN; i++)
            {
                object[,] objSort = sortMultiArray(objList[i]); //sort array to select the nearest neighbour of Xi
                objSortedList.Add(objSort);                     //add to list
            }

            //select boundary instances
            for (int i = 0; i < n; i++)
            {
                //select the k-neareast neighbours (using top K elements), their corresponding distances and class labels of Xi
                int subK = k;
                int count1 = 0; int count2 = 0;
                for (int p = 0; p < subN; p++)
                {
                    object[,] objSorted = objSortedList[p];
                    if (count1 < (subK / 2) && (double)objSorted[p, 2] == 1) //50% of k goes to positive class. This is to ensure that there is a balance in the training subset
                    {
                        dist.Add((double)objSorted[p, 0]);                   //distance
                        nearestNeighbours.Add((Node[])objSorted[p, 1]);      //nearest neighbour i
                        labels.Add((double)objSorted[p, 2]);                 //class labels
                        index.Add((int)objSorted[p, 3]);                     //add index for each nearest neighbour
                        count1++;
                    }
                    else if (count2 < (subK / 2) && (double)objSorted[p, 2] == -1) //50% of K goes to negative class
                    {
                        dist.Add((double)objSorted[p, 0]);                         //distance
                        nearestNeighbours.Add((Node[])objSorted[p, 1]);            //nearest neighbour i
                        labels.Add((double)objSorted[p, 2]);                       //class labels
                        index.Add((int)objSorted[p, 3]);                           //add index for each nearest neighbour
                        count2++;
                    }
                }

                nn[i, 0] = k;
                nn[i, 1] = dist;
                nn[i, 2] = nearestNeighbours;
                nn[i, 3] = trainDataset.X[i];
                nn[i, 4] = labels;
                nn[i, 5] = trainDataset.Y[i];
                nn[i, 6] = index; //save the index

                //Compute Exponential Decay
                double        EDScore    = 0; //Exponential decay score
                int           counter    = 0;
                double        distNN     = 0;
                List <double> distNNList = new List <double>();
                for (int p = 0; p < subK; p++)
                {
                    //compute exponential decay for Xi and all its Nearest neighbour belonging to the opposite class
                    //if the label of the current instance in the neighbourhood list is not equal to the label of ith instance then compute its Exponential Decay Score
                    if (((List <double>)nn[i, 4])[p] != (double)nn[i, 5]) //identify the nearest neighbour belonging to the opposite class
                    {
                        int indx = ((List <int>)nn[i, 6])[p];             //get the index of the current nearest neighbour
                        object[,] objNN = objSortedList[indx];            //get the current nearest neighbour from list
                        //using the index, select the distance of the closest instance of the opposite class on its neighborhood list

                        for (int a = 0; a < subN; a++)
                        {
                            double label1 = (double)objNN[a, 2];          //label of the current instance in the neighbourhood list of the current nearest neigbour
                            double label2 = ((List <double>)nn[i, 4])[p]; //label of the current instance in the neighbourhood list
                            //if the statement below is true (that is, if the labels are not equal), then select the closest instance of the opposite class on its neighborhood list
                            //List is ordered already, hence the topmost instance of the opposite class in the neighbourhood list, is the closest instance
                            if (label1 != label2)
                            {
                                distNN = (double)objNN[a, 0]; //get the distance and break. We only need the distance of the closest instance.
                                distNNList.Add(distNN);
                                break;
                            }
                        }

                        EDScore += ((List <double>)nn[i, 1])[p] - Math.Pow(distNN, 2); //compute exponential decay score
                        //EDScore += ((List<double>)nn[i, 1])[p] - Math.Pow(((List<double>)nn[i, 1])[p], 2); //compute exponential decay score
                        counter++;
                    }
                }

                EDScore = EDScore / counter;

                //determine the scores of every instance
                int numOfContributors = counter; int b = 0;
                for (int p = 0; p < subK; p++)
                {
                    //if the label of the current instance in the neighbourhood list is not equal to the label of ith instance
                    if (((List <double>)nn[i, 4])[p] != (double)nn[i, 5])//identify the nearest neighbour belonging to the opposite class
                    {
                        //score += Math.Exp(-(((List<double>)nn[i, 1])[p] - Math.Pow(((List<double>)nn[i, 1])[p], 2) / EDScore));
                        score += Math.Exp(-(((List <double>)nn[i, 1])[p] - Math.Pow(distNNList[b++], 2) / EDScore));
                    }
                }
                score           = score / numOfContributors;
                scoreList[i, 0] = score; scoreList[i, 1] = nn[i, 3]; scoreList[i, 2] = nn[i, 5]; scoreList[i, 3] = nn[i, 6];

                dist = new List <double>(); nearestNeighbours = new List <Node[]>(); labels = new List <double>();
            }


            sortMultiArray(scoreList); //sort scores to select the best N instances to be used for training

            //select data subset to be used for training. Selected subset are instances that are closest to the data boundary
            Node[][] xScoreList = new Node[numOfSubset][];
            double[] yScoreList = new double[numOfSubset];
            int      cnt1 = 0, cnt2 = 0, cnt3 = 0;
            int      total = n - 1;

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < 3; j++)
                {
                    dataSubset[i, j] = scoreList[total, j];                      //select instances with the highest scores
                }
                if (cnt1 < (0.7 * numOfSubset) && (double)dataSubset[i, 2] == 1) //select 70% positive instance of the subset
                {
                    xScoreList[cnt3] = (Node[])dataSubset[i, 1];
                    yScoreList[cnt3] = (double)dataSubset[i, 2];
                    cnt1++; cnt3++;
                }
                else if (cnt2 < (0.3 * numOfSubset) && (double)dataSubset[i, 2] == -1) //select 30% negative instance of the subset
                {
                    xScoreList[cnt3] = (Node[])dataSubset[i, 1];
                    yScoreList[cnt3] = (double)dataSubset[i, 2];
                    cnt2++; cnt3++;
                }
                total--;
            }
            Problem subset = new Problem(numOfSubset, yScoreList, xScoreList, xScoreList[0].GetLength(0));

            return(subset);
        }
        //build model for multi class problems
        public Problem buildModelMultiClass(ObjectInstanceSelection firefly, Problem prob)
        {
            int           tNI = firefly.Attribute_Values.Count(); //size of each Instance Mask
            List <double> y = new List <double>();
            List <Node[]> x = new List <Node[]>();
            bool          pos = false, neg = false;
            List <double> classes = getClassLabels(prob.Y); //get the class labels
            int           nClass  = classes.Count;          //count the number of classes

            int[] classCount = new int[nClass];
            //building model for each instance in instance mask in each firefly object
            for (int j = 0; j < tNI; j++)
            {
                if (firefly.__Attribute_Values[j] == 1) //if instance is selected, use for classification
                {
                    int p = firefly.__Pointers[j];
                    x.Add(prob.X[p]);
                    y.Add(prob.Y[p]);

                    for (int i = 0; i < nClass; i++)
                    {
                        if (prob.Y[p] == classes[i])
                        {
                            classCount[i]++; //count the total number of instances in each class
                        }
                    }
                }
                else
                {
                    continue;
                }
            }

            Node[][] X = new Node[x.Count][];
            double[] Y = new double[y.Count];

            //ensuring that the subproblem consist of both positive and negative instance
            int k = 0;

            if (classCount.Sum() == 0) //if the sum is zero, then no instance was selected
            {
                return(null);
            }
            else //ensure that instance mask contains at least, one of each class instance
            {
                for (int a = 0; a < nClass; a++)
                {
                    if (classCount[a] == 0)
                    {
                        int m = 0;
                        for (int i = 0; i < prob.Count; i++) //if no instance in this class, search the whole subproblem and insert one instance in the kth position of subproblem
                        {
                            if (prob.Y[i] == classes[a])
                            {
                                x[k] = prob.X[i]; //insert negative instance in the first and second position
                                y[k] = prob.Y[i]; //insert label
                                k++; m++;
                            }
                            if (m == 2)
                            {
                                break;
                            }
                        }
                    }
                }
            }

            x.CopyTo(X); //convert from list to double[] array
            y.CopyTo(Y);
            Problem subProb = new Problem(X.Count(), Y, X, X[0].GetLength(0));

            return(subProb);
        }
        //compute the k-nearest neighbour of all instances in the dataset
        public Problem computeNearestNeighbour(int k, Problem trainDataset, int numOfSubset)
        {
            double        sum = 0; double distance;
            int           n = trainDataset.Count; //number of data instances
            List <Node[]> nearestNeighbours = new List <Node[]>(); List <double> dist = new List <double>(); List <double> labels = new List <double>();

            Node[] xNodes = new Node[n];
            Node[] yNodes = new Node[n];
            object[,] obj = new object[n - 1, 3];
            //object[,] obj = new object[k, 3];
            object[,] temp = new object[1, 3];
            List <Problem> ds = new List <Problem>();

            object[,] nn = new object[n, 6];         //data structure containing the NNs and their corresponding distances
            double score = 0;                        //score assigned to individual instance by the oppositiley NNs in its neighbourhood list

            object[,] scoreList  = new object[n, 3]; //scores assigned to all the instances
            object[,] dataSubset = new object[n, 3]; //subset of data to return

            //compute distance between Xi and other instances
            for (int i = 0; i < n; i++)
            {
                int ctr = 0; int cntr1 = 0; int cntr2 = 0;
                int countP = trainDataset.Y.Count(q => q == 1);
                int countN = trainDataset.Y.Count(q => q == -1);
                for (int j = 0; j < n; j++)
                {
                    if (j.Equals(i))
                    {
                        continue;
                    }
                    if (countN <= 1) //come here if we have very few selected negative instance in the subproblem
                    {
                        double propP = n * 0.9, propN = n * 0.1;
                        obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN); //0.9 and 0.1 are proportion of positive and negative instances to be selected
                        //ctr++; cntr1++; cntr2++;
                    }
                    else if (countP <= 1) //come here if we have very few selected positive instance
                    {
                        double propP = n * 0.1, propN = n * 0.9;
                        obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN); //0.1 and 0.9 are proportion of positive and negative instances to be selected
                    }
                    else if (n > trainDataset.Count)                                                             //come here of n is more than the total number of selected instances
                    {
                        double propP = countP, propN = trainDataset.Count - countP;                              //in this case, selected instances consist of all the positive instance and a portion of negative instance
                        obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN);
                        //ctr++; cntr1++; cntr2++;
                    }
                    else if (countN < (n * 0.7) || countP < (n * 0.3)) //come here if the selected positive or negative instances is less than the defined proportion
                    {
                        if (countP < (n * 0.3))
                        {
                            double propP = countP, propN = n - countP; //in this case, selected instances consist of all the positive instance and a portion of negative instance
                            obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN);
                        }
                        else if (countN < (n * 0.7))
                        {
                            double propP = n - countN, propN = countN; //in this case, selected instances consist of all the positive instance and a portion of negative instance
                            obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN);
                        }
                    }
                    else //come here if we have fairly good distribution of positive and negative instances
                    {
                        double propP = n * 0.3, propN = n * 0.7;
                        obj = buildObject(ref ctr, ref cntr1, ref cntr2, i, j, obj, trainDataset, propP, propN); //0.3 and 0.7 are proportion of positive and negative instances to be selected
                    }
                }

                Training.sortMultiArray(obj); //sort array to select the nearest neighbour of Xi

                //select the k-neareast neighbours (using top K elements), their corresponding distances and class labels of Xi
                //int subK = 30;
                int subK = k;
                int count1 = 0; int count2 = 0; int sumN = 0, sumP = 0;
                for (int z = 0; z < obj.GetLength(0); z++) //count the total number of positive and negative instances in the subproblem
                {
                    if ((double)obj[z, 2] == 1)
                    {
                        sumP++;
                    }
                    else
                    {
                        sumN++;
                    }
                }
                for (int p = 0; p < k; p++)                       //select k-neareast neighbours (using top K elements), their corresponding distances and class labels of Xi
                {
                    if (count1 < sumP && (double)obj[p, 2] == 1)  //NN for positive class
                    {
                        dist.Add((double)obj[p, 0]);              //distance
                        nearestNeighbours.Add((Node[])obj[p, 1]); //nearest neighbour i
                        labels.Add((double)obj[p, 2]);            //class labels
                        count1++;
                    }
                    else if (count2 < sumN && (double)obj[p, 2] == -1) // NN for negative class
                    {
                        dist.Add((double)obj[p, 0]);                   //distance
                        nearestNeighbours.Add((Node[])obj[p, 1]);      //nearest neighbour i
                        labels.Add((double)obj[p, 2]);                 //class labels
                        count2++;
                    }
                }

                //for (int z = 0; z < obj.Length; z++)

                nn[i, 0] = k; nn[i, 1] = dist; nn[i, 2] = nearestNeighbours; nn[i, 3] = trainDataset.X[i]; nn[i, 4] = labels; nn[i, 5] = trainDataset.Y[i];

                //Compute Exponential Decay
                double EDScore = 0; //Exponential decay score
                int    counter = 0;
                for (int p = 0; p < subK; p++)
                {
                    //compute exponential decay for Xi and all its Nearest neighbour belonging to the opposite class
                    //if the label of the current instance in the neighbourhood list is not equal to the label of ith instance then compute its Exponential Decay Score
                    if (((List <double>)nn[i, 4])[p] != (double)nn[i, 5])                                    //identify the nearest neighbour belonging to the opposite class
                    {
                        EDScore += ((List <double>)nn[i, 1])[p] - Math.Pow(((List <double>)nn[i, 1])[p], 2); //compute exponential decay score
                        counter++;                                                                           //counting the number of contributors
                    }
                }
                EDScore = EDScore / counter;

                //determine the scores of every instance
                //int numOfContributors = k - counter; //number of NN of opposite class that contributes to Xi
                int numOfContributors = counter;
                for (int p = 0; p < subK; p++)
                {
                    //if the label of the current instance in the neighbourhood list is not equal to the label of ith instance
                    if (((List <double>)nn[i, 4])[p] != (double)nn[i, 5])//identify the nearest neighbour belonging to the opposite class
                    {
                        score += Math.Exp(-(((List <double>)nn[i, 1])[p] - Math.Pow(((List <double>)nn[i, 1])[p], 2) / EDScore));
                    }
                }
                score           = score / numOfContributors;
                scoreList[i, 0] = score; scoreList[i, 1] = nn[i, 3]; scoreList[i, 2] = nn[i, 5];

                dist = new List <double>(); nearestNeighbours = new List <Node[]>(); labels = new List <double>();
                //EDScoreList.Add(EDScore);//list of Exponential Decay scores
                //Problem pp = new Problem(k, dist, nearestNeighbours, trainDataset.X[i], labels);
                //ds.Add(pp);
            }
            Training.sortMultiArray(scoreList); //sort scores to select the best N instances to be used for training

            //select data subset to be used for training. Selected subset are instances that are closest to the data boundary
            Node[][] xScoreList = new Node[numOfSubset][];
            double[] yScoreList = new double[numOfSubset];
            int      cnt1 = 0, cnt2 = 0, cnt3 = 0;
            int      total = n - 1;

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < 3; j++)
                {
                    dataSubset[i, j] = scoreList[total, j];                      //select instances with the highest scores
                }
                if (cnt1 < (0.1 * numOfSubset) && (double)dataSubset[i, 2] == 1) //select 70% positive instance of the subset
                {
                    xScoreList[cnt3] = (Node[])dataSubset[i, 1];
                    yScoreList[cnt3] = (double)dataSubset[i, 2];
                    cnt1++; cnt3++;
                }
                else if (cnt2 < (0.9 * numOfSubset) && (double)dataSubset[i, 2] == -1) //select 30% negative instance of the subset
                {
                    xScoreList[cnt3] = (Node[])dataSubset[i, 1];
                    yScoreList[cnt3] = (double)dataSubset[i, 2];
                    cnt2++; cnt3++;
                }
                total--;
            }
            Problem subset = new Problem(numOfSubset, yScoreList, xScoreList, xScoreList[0].GetLength(0));

            return(subset);
        }
        public object[,] buildObject(ref int ctr, ref int cntr1, ref int cntr2, int i, int j, object[,] obj, Problem trainDataset, double propP, double propN)
        {
            //ctr = a; cntr1 = b; cntr2 = c;
            double distance;

            if (cntr1 < propP && trainDataset.Y[j] == 1)                                                  //compute distance for positive class (90% of k goes for positive instances)
            {
                distance    = Kernel.computeSquaredDistance(trainDataset.X[i], trainDataset.X[j]);        //compute the distance between Xi and all other instances in the dataset
                obj[ctr, 0] = distance; obj[ctr, 1] = trainDataset.X[j]; obj[ctr, 2] = trainDataset.Y[j]; //save the instance and their corresponding distances
                ctr++;  cntr1++;
            }
            else if (trainDataset.Y[j] == -1 && cntr2 < propN)                                            //compute distance for negative class (10% of k goes for negative instances)
            {
                distance    = Kernel.computeSquaredDistance(trainDataset.X[i], trainDataset.X[j]);        //compute the distance between Xi and all other instances in the dataset
                obj[ctr, 0] = distance; obj[ctr, 1] = trainDataset.X[j]; obj[ctr, 2] = trainDataset.Y[j]; //save the instance and their corresponding distances
                ctr++; cntr2++;
            }

            return(obj);
        }
        /// <summary>
        /// Move all fireflies toward brighter ones
        /// </summary>
        //public void ffa_move(double[] Lightn, ObjectInstanceSelection[] fireflies0, double[] Lighto, double alpha, double gamma, List<ObjectInstanceSelection> fireflies,
        //                      Problem prob, Parameter param, List<double> avgAcc, List<int> changedIndex)
        public void ffa_move(double[] Lightn, ObjectInstanceSelection[] fireflies0, double[] Lighto, double alpha, double gamma, List <ObjectInstanceSelection> fireflies,
                             Problem prob)
        {
            int    nFF = fireflies.Count; //number of fireflies
            double rC, rG, rF;            //rC -> distance for C value, rG-> distance for Gamma value, rF - distance for the feature mask
            double beta0;
            double beta;                  // beta -> attrativeness value for C and G, betaF -> attrativeness for the feature mask

            //specifying the ranges for C and Gamma
            double minC = Math.Pow(2, MIN_C);                       // minimum value for C
            double maxC = Math.Pow(2, MAX_C);                       // maximum value for C
            double minG = Math.Pow(2, MIN_G);                       // minimum value for G
            double maxG = Math.Pow(2, MAX_G);                       // maximum value for G

            int subsetSize = fireflies[0].Attribute_Values.Count(); //size of Instance Mask

            double[] CBackup     = new double[fireflies.Count];     //back up array for C value
            double[] GammaBackup = new double[fireflies.Count];     ////back up array for Gamma value
            double   val;

            Random rnd = new Random();
            Random rx  = new Random();
            Random ry  = new Random();

            duplicateValue(fireflies, CBackup, GammaBackup);
            for (int i = 0; i < nFF; i++)
            {
                for (int j = 0; j < nFF; j++)
                {
                    if (j == i) //avoid comparism with the same element
                    {
                        continue;
                    }
                    rF = 0.0;

                    rC = Math.Pow(((double)fireflies[i].cValue - (double)fireflies0[j].cValue), 2);
                    rG = Math.Pow(((double)fireflies[i].GValue - (double)fireflies0[j].GValue), 2);
                    double r = Math.Sqrt(rC + rG); //r -> total distance for both C and Gamma

                    if (Lightn[i] < Lighto[j])
                    {
                        beta0 = 1;                                         //setting beta to 1
                        beta  = beta0 * Math.Exp(-gamma * Math.Pow(r, 2)); //The attractiveness parameter for C and Gamma -> beta=exp(-gamma*r)
                        double rand = rnd.NextDouble();

                        //changing firefly i position for the continuous values - i.e C and Gamma value respectively
                        fireflies[i].cValue = ((double)fireflies[i].cValue * (1 - beta)) + (CBackup[j] * beta) + (alpha * (rnd.NextDouble() - 0.5));
                        fireflies[i].GValue = ((double)fireflies[i].GValue * (1 - beta)) + (GammaBackup[j] * beta) + (alpha * (rnd.NextDouble() - 0.5));

                        //move the individual position of each instance mask
                        for (int k = 0; k < subsetSize; k++)
                        {
                            val = ((double)fireflies[i].__Attribute_Values[k] * (1 - beta)) + (GammaBackup[j] * beta) + (alpha * (rand - 0.5)); //moving position of firefly
                            fireflies[i].__Attribute_Values[k] = Binarize(val, rand);                                                           //convert from discrete to binary
                        }

                        findrange(fireflies[i], minC, maxC, minG, maxG); //restrict the values of C and Gamma to the specified range
                    }
                }
                //if ((double)fireflies[i].cValue != CBackup[i] || (double)fireflies[i].GValue != GammaBackup[i])
                //    changedIndex.Add(i); //saving the index of the firefly that has been moved for the purpose of accuracy calculation. This to reduce the number of computations
            }

            //calculate the new accuracy for the newly updated C and Gamma value
            //ParameterSelection.Grid(prob, param, fireflies, changedIndex, avgAcc, CBackup, GammaBackup, NFOLD);
        }
        /// <summary>
        /// generating the initial locations of n fireflies
        /// </summary>
        public List <ObjectInstanceSelection> init_ffa(int nFF, int subsetSize, int probSize, Problem prob)
        {
            Random     rnd  = new Random();                                  // Random rx = new Random(); Random ry = new Random();
            List <int> rNum = Training.GetRandomNumbers(probSize, probSize); //generate N random numbers

            List <ObjectInstanceSelection> attr_values = new List <ObjectInstanceSelection>();
            int cnt1 = 0, cnt2 = 0, cnt3 = 0;

            //create an array of size n for x and y
            int[] xn = new int[subsetSize];       //instance mask
            int[] pointers = new int[subsetSize]; //array contain pointer to actual individual instance represented in the instance mask
            int   k = 0;

            for (int i = 0; i < nFF; i++)
            {
                xn       = new int[subsetSize];
                pointers = new int[subsetSize];
                cnt1     = 0; cnt2 = 0; cnt3 = 0;
                for (int j = 0; j < prob.Count; j++)
                {
                    if (cnt1 < (0.7 * subsetSize) && prob.Y[j] == 1) //select 70% positive instance of the subset
                    {
                        xn[cnt3]       = rnd.Next(0, 2);
                        pointers[cnt3] = rNum[k];
                        k++; cnt1++; cnt3++;
                    }
                    else if (cnt2 < (0.3 * subsetSize) && prob.Y[j] == -1)
                    {
                        xn[cnt3]       = rnd.Next(0, 2);
                        pointers[cnt3] = rNum[k];
                        k++; cnt2++; cnt3++;
                    }
                    if (cnt3 >= subsetSize)
                    {
                        break;
                    }
                }

                ObjectInstanceSelection OI = new ObjectInstanceSelection(0.0, 0.0, xn, pointers);
                attr_values.Add(OI);
            }

            return(attr_values);
        }
        /// <summary>
        /// Main part of the Firefly Algorithm
        /// </summary>
        //public Problem firefly_simple(List<double> avgAcc, List<double> CValues, List<double> GValues, Problem prob)
        public Problem firefly_simple(Problem prob, out double storagePercentage)
        {
            //int nF = 9; //number of instances
            int nI            = prob.X.Count(); //total number of instance in dataset
            int nFF           = 5;              //number of fireflies. Note: NFF * subsetsize must not be greater than Size of training dataset
            int subsetSize    = 100;            //size of each firefly Instance Mask
            int MaxGeneration = 5;              //number of pseudo time steps

            int[] range = new int[4] {
                -5, 5, -5, 5
            };                  //range=[xmin xmax ymin ymax]

            double alpha = 0.2; //Randomness 0--1 (highly random)
            double gamma = 1.0; //Absorption coefficient

            int[]    xn     = new int[subsetSize];
            double[] xo     = new double[subsetSize];
            double[] Lightn = new double[nFF];
            double[] Lighto = new double[nFF];

            double[] fitnessVal = new double[nFF];
            double   globalbestIntensity;
            ObjectInstanceSelection globalBest = null;


            //generating the initial locations of n fireflies
            List <ObjectInstanceSelection> fireflies = init_ffa(nFF, subsetSize, nI, prob);

            ObjectInstanceSelection[] fireflyBackup = new ObjectInstanceSelection[fireflies.Count];
            ObjectInstanceSelection[] fireflyBest   = new ObjectInstanceSelection[fireflies.Count];
            List <int> changedIndex     = new List <int>(); //changedIndex keeps track of the index of fireflies that has been moved
            double     newBestIntensity = new double();
            int        maxIndex;
            bool       stopSearch = false; //stopsearch is will be set to true when the a firefly with classification accuracy = 100 is found.

            globalbestIntensity = double.MinValue;

            //Iterations or pseudo time marching
            for (int i = 0; i < MaxGeneration; i++)
            {
                //Evaluate objective function
                fitnessVal = this.EvaluateObjectiveFunction(fireflies, prob); //evaluate objective function for each firefly

                //stop searching if firefly has found the best c and G value that yields 100%
                for (int t = 0; t < fitnessVal.Count(); t++)
                {
                    //double predAccr = avgAcc[changedIndex[t]] * 100;
                    double predAccr = fitnessVal[t] * 100;
                    if (predAccr == 100) //if prediction accuracy is equal to 100, stop searching and select the firefly that gives this accuracy
                    {
                        globalBest = fireflies[changedIndex[t]];
                        stopSearch = true;
                        break;
                    }
                }

                //stop searching if firefly has found the best c and G value that yields 100%
                if (stopSearch == true)
                {
                    break;
                }

                //fitnessVal = this.EvaluateObjectiveFunction(fireflies, avgAcc, prob); //evaluate objective function for each firefly
                newBestIntensity = fitnessVal.Max(); //get the firefly with the highest light intensity
                if (newBestIntensity > globalbestIntensity)
                {
                    globalbestIntensity = newBestIntensity;
                    maxIndex            = Array.IndexOf(fitnessVal, newBestIntensity); //select the index for the global best
                    globalBest          = fireflies[maxIndex];                         //select the global best firefly
                    //bestC = (double)fireflies[maxIndex].cValue; //save the C value for the global best
                    //bestGamma = (double)fireflies[maxIndex].GValue; //save the Gamma for the global best
                }

                fireflies.CopyTo(fireflyBackup); fitnessVal.CopyTo(Lighto, 0); fitnessVal.CopyTo(Lightn, 0); //creating duplicates
                //Lightn.CopyTo(Lighto, 0);

                changedIndex.Clear();
                ffa_move(Lightn, fireflyBackup, Lighto, alpha, gamma, fireflies, prob);

                fireflies.CopyTo(fireflyBackup); //backing up the current positions of the fireflies
                Lightn.CopyTo(Lighto, 0);        //backing up the current intensities of the fireflies
            }

            //ensure that at least, 40 instances is selected for classification
            int countSelected = globalBest.__Attribute_Values.Count(q => q == 1); //count the total number of selected instances
            int diff, c = 0, d = 0;
            int Min = 15;                                                         //minimum number of selected instances

            if (countSelected < Min)
            {
                diff = Min - countSelected;
                //if there are less than 40, add N instances, where N = the number of selected instances and 40
                while (c < diff)
                {
                    if (globalBest.__Attribute_Values[d++] == 1)
                    {
                        continue;
                    }
                    else
                    {
                        globalBest.__Attribute_Values[d++] = 1;
                        c++;
                    }
                }
            }

            Problem subBest = buildModelMultiClass(globalBest, prob);      //model for the best Instance Mast

            storagePercentage = Training.StoragePercentage(subBest, prob); //calculate the percent of the original training set was retained by the reduction algorithm

            return(subBest);
        }
예제 #19
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        // Return parameter of a Laplace distribution
        private static double svm_svr_probability(Problem prob, Parameter param)
        {
            int i;
            int nr_fold = 5;
            double[] ymv = new double[prob.Count];
            double mae = 0;

            Parameter newparam = (Parameter)param.Clone();
            newparam.Probability = false;
            svm_cross_validation(prob, newparam, nr_fold, ymv);
            for (i = 0; i < prob.Count; i++)
            {
                ymv[i] = prob.Y[i] - ymv[i];
                mae += Math.Abs(ymv[i]);
            }
            mae /= prob.Count;
            double std = Math.Sqrt(2 * mae * mae);
            int count = 0;
            mae = 0;
            for (i = 0; i < prob.Count; i++)
                if (Math.Abs(ymv[i]) > 5 * std)
                    count = count + 1;
                else
                    mae += Math.Abs(ymv[i]);
            mae /= (prob.Count - count);
            Procedures.info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=" + mae + "\n");
            return mae;
        }
        /// <summary>
        /// Evaluate Objective Function
        /// </summary>
        //public double[] EvaluateObjectiveFunction(List<ObjectInstanceSelection> fireflies, List<double> accuracy, Problem prob)
        public double[] EvaluateObjectiveFunction(List <ObjectInstanceSelection> fireflies, Problem prob)
        {
            int NF  = fireflies.Count;                                 //NF -> number of fireflies
            int tNI = fireflies.ElementAt(0).Attribute_Values.Count(); //size of each Instance Mask

            double[] fitness = new double[NF];
            int      sum;


            List <double> y = new List <double>();
            List <Node[]> x = new List <Node[]>();

            double C, Gamma;

            for (int i = 0; i < NF; i++)
            {
                //building model for each instance in instance mask in each firefly object
                Problem subProb = buildModel(fireflies.ElementAt(i), prob);

                Parameter param = new Parameter();
                if (subProb != null)
                {
                    int countP = subProb.Y.Count(k => k == 1);  //counting the total number of positive instance in the subpeoblem
                    int countN = subProb.Y.Count(k => k == -1); //counting the total number of negative instance in the subproblem

                    if (countN <= 1 || countP <= 1)             //ensuring that there are at least two positive or negative instance in a subproblem
                    {
                        int m = 0;
                        if (countN <= 1)
                        {
                            for (int k = 0; k < prob.Count; k++) //if no negative instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                            {
                                if (prob.Y[k] == -1)
                                {
                                    subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                    subProb.Y[m] = prob.Y[k]; //insert label
                                    m++;
                                }
                                if (m == 2)
                                {
                                    break;
                                }
                            }
                        }
                        else if (countP <= 1)
                        {
                            for (int k = 0; k < prob.Count; k++) //if no positive instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                            {
                                if (prob.Y[k] == 1)
                                {
                                    subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                    subProb.Y[m] = prob.Y[k]; //insert label
                                    m++;
                                }
                                if (m == 2)
                                {
                                    break;
                                }
                            }
                        }
                    }

                    Problem subP = Training.ClusteringBoundaryInstance(subProb);

                    int c = subP.Count;

                    int    count = fireflies.ElementAt(i).__Attribute_Values.Count(q => q == 1); //total number of selected instances, to be used for subsetSize
                    double percentageReduction = 100 * (tNI - count) / tNI;                      //calculating percentage reduction for each instance Mask
                    fitness[i] = percentageReduction;


                    /*
                     * ParameterSelection.Grid(subProb, param, "params.txt", out C, out Gamma); //select parameters for each subset
                     * param.C = C;
                     * param.Gamma = Gamma;
                     * Model subModel = Training.Train(subProb, param); //train each subset
                     * double accr = Prediction.Predict(prob, "ClassificationResults.txt", subModel, false); //use each subset to classify train dataset
                     * sum = 0;
                     * for (int j = 0; j < tNI; j++)
                     *  sum += fireflies.ElementAt(i).Attribute_Values[j];
                     *
                     * fitness[i] = W_SVM * accr + W_Features * (double)(1 - ((double)sum / (double)tNI)); //fitness evaluation for individual firefly
                     * //fitness[i] = accuracy[i] + W_Features * (double)(1 - ((double)sum / (double)tNFe)); //fitness evaluation for individual firefly
                     */

                    /*
                     * for (int j = 0; j < tNI; j++)
                     * {
                     * if (fireflies.ElementAt(i).__Attribute_Values[j] == 1) //if instance is selected, use for classification
                     * {
                     *  int p = fireflies.ElementAt(i).__Pointers[j];
                     *  x.Add(prob.X[p]);
                     *  y.Add(prob.Y[p]);
                     * }
                     * else
                     *  continue;
                     * }
                     *
                     * Node[][] X = new Node[x.Count][];
                     * double[] Y = new double[y.Count];
                     *
                     * x.CopyTo(X); //convert from list to double[] array
                     * y.CopyTo(Y);
                     *
                     *
                     * Problem subProb = new Problem(X.Count(), Y, X, X[0].GetLength(0));
                     */
                }
            }

            return(fitness);
        }
예제 #21
0
        //flower pollination algorithm by Yang
        public Problem BinaryFlowerPollination(Problem prob, out double storagePercentage)
        {
            int    nargin = 0, totalInstances = prob.X.Count();
            int    maxGeneration     = 3;
            int    numOfFlower       = 3;   //population size
            int    subsetSize        = 100; //dimension for each flower
            double probabilitySwitch = 0.8; //assign probability switch

            double[] flowerFitnessVal    = new double[numOfFlower];
            double[] newFlowerFitnessVal = new double[numOfFlower];

            double globalBest = double.MinValue;
            double newBest    = new double();
            ObjectInstanceSelection globalBestFlower = null;
            int lowerBound = -2; //set lower bound - lower boundary
            int upperBound = 2;  //set upper bound - upper boundary
            int maxIndex;

            //inittalize flowers, and get global best
            List <ObjectInstanceSelection> flowers    = InitializeBinaryFlower(numOfFlower, subsetSize, totalInstances, prob); //initialize solution
            List <ObjectInstanceSelection> newFlowers = new List <ObjectInstanceSelection>(flowers.Count);                     //create a clone of flowers

            flowers.ForEach((item) =>
            {
                newFlowers.Add(new ObjectInstanceSelection(item.__Attribute_Values, item.__Attribute_Values_Continuous, item.__Pointers, item.__Fitness)); //create a clone of flowers
            });

            flowerFitnessVal    = EvaluateObjectiveFunction(flowers, prob);                                                                //evaluate fitness value for all the flowers
            newFlowerFitnessVal = EvaluateObjectiveFunction(newFlowers, prob);                                                             //evaluate fitness value for new flowers. Note: this will be the same for this function call, since pollination has not occur
            FlowerFitness(flowerFitnessVal, flowers);                                                                                      //fitness value for each flower
            FlowerFitness(newFlowerFitnessVal, newFlowers);                                                                                //fitness value for new flower
            globalBestFlower = EvaluateSolution(flowerFitnessVal, newFlowerFitnessVal, globalBest, flowers, newFlowers, globalBestFlower); //get the global best flower
            globalBest       = flowerFitnessVal.Max();

            //start flower algorithm
            Random r = new Random(); int x = 0;

            double[] levy = new double[subsetSize];
            for (int i = 0; i < maxGeneration; i++)
            {
                double rand = r.NextDouble();
                if (rand > probabilitySwitch) //do global pollination, to produce new pollen solution
                {
                    levy = LevyFlight(subsetSize);
                    for (int j = 0; j < numOfFlower; j++)
                    {
                        for (int k = 0; k < subsetSize; k++)
                        {
                            double A = levy[k] * (flowers[j].Attribute_Values[k] - globalBestFlower.Attribute_Values[k]);
                            double B = flowers[j].Attribute_Values[k] + A; //new pollen solution
                            //double A = levy[k] * (flowers[j].Attribute_Values_Continuous[k] - globalBestFlower.Attribute_Values_Continuous[k]);
                            //double B = flowers[j].Attribute_Values_Continuous[k] + A;
                            newFlowers[j].Attribute_Values[k] = ConvertToBinary(B, r.NextDouble()); //convert to binary

                            //newFlowers[j].__Attribute_Values[k] = TransferFunction(B, newFlowers[j].__Attribute_Values[k]); //update flower position in the binary space
                        }
                        List <int> randNum = Training.GetRandomNumbers(2, numOfFlower); //generate 2 distinct random numbers
                        for (int k = 0; k < subsetSize; k++)
                        {
                            double A = flowers[j].Attribute_Values[k] + (r.NextDouble() * (flowers[randNum[0]].Attribute_Values[k] - flowers[randNum[1]].Attribute_Values[k])); //randomly select two flowers from neighbourhood for pollination
                            //double A = flowers[j].Attribute_Values_Continuous[k] + r.NextDouble() * (flowers[randNum[0]].Attribute_Values_Continuous[k] - flowers[randNum[1]].Attribute_Values_Continuous[k]); //randomly select two flowers from neighbourhood for pollination
                            newFlowers[j].Attribute_Values[k] = ConvertToBinary(A, r.NextDouble());                                                                             //convert to binary

                            //newFlowers[j].__Attribute_Values[k] = TransferFunction(A, newFlowers[j].__Attribute_Values[k]); //update flower position in the binary space
                        }
                    }
                }
                else // //do local pollination, to produce new pollen solution
                {
                    for (int j = 0; j < numOfFlower; j++)
                    {
                        List <int> randNum = Training.GetRandomNumbers(2, numOfFlower); //generate 2 distinct random numbers
                        for (int k = 0; k < subsetSize; k++)
                        {
                            double A = flowers[j].Attribute_Values[k] + r.NextDouble() * (flowers[randNum[0]].Attribute_Values[k] - flowers[randNum[1]].Attribute_Values[k]); //randomly select two flowers from neighbourhood for pollination
                            //double A = flowers[j].Attribute_Values_Continuous[k] + r.NextDouble() * (flowers[randNum[0]].Attribute_Values_Continuous[k] - flowers[randNum[1]].Attribute_Values_Continuous[k]); //randomly select two flowers from neighbourhood for pollination
                            newFlowers[j].Attribute_Values[k] = ConvertToBinary(A, r.NextDouble());                                                                           //convert to binary

                            //newFlowers[j].__Attribute_Values[k] = TransferFunction(A, newFlowers[j].__Attribute_Values[k]); //update flower position in the binary space
                        }
                    }
                }

                //Select best solutions from the original population and matured population for the next generation;
                SelectBestSolution(flowers, newFlowers);

                //evaluate new solution
                newFlowerFitnessVal = EvaluateObjectiveFunction(newFlowers, prob);                                                             //evaluate fitness value for all the flowers
                FlowerFitness(newFlowerFitnessVal, newFlowers);                                                                                //fitness value for new flower
                globalBestFlower = EvaluateSolution(flowerFitnessVal, newFlowerFitnessVal, globalBest, flowers, newFlowers, globalBestFlower); //Evaluate solution, update better solution and get global best flower
                globalBest       = globalBestFlower.Fitness;

                //if solution has converged to a optimal user-defined point, stop search
                int Max = 60;          // maximum percentage reduction
                if (globalBest >= Max) //if the percentage reduction has approached 60%, stop search!
                {
                    break;
                }
            }

            //ensure that at least, N instances are selected for classification
            int min = 15; //minimum number of selected instances

            globalBestFlower = AddInstances(globalBestFlower, min);

            Problem subBest = fi.buildModelMultiClass(globalBestFlower, prob); //build model for the best Instance Mast

            storagePercentage = Training.StoragePercentage(subBest, prob);     //calculate the percent of the original training set was retained by the reduction algorithm
            return(subBest);
        }
예제 #22
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        private static void solve_nu_svc(Problem prob, Parameter param,
                        double[] alpha, Solver.SolutionInfo si)
        {
            int i;
            int l = prob.Count;
            double nu = param.Nu;

            sbyte[] y = new sbyte[l];

            for (i = 0; i < l; i++)
                if (prob.Y[i] > 0)
                    y[i] = +1;
                else
                    y[i] = -1;

            double sum_pos = nu * l / 2;
            double sum_neg = nu * l / 2;

            for (i = 0; i < l; i++)
                if (y[i] == +1)
                {
                    alpha[i] = Math.Min(1.0, sum_pos);
                    sum_pos -= alpha[i];
                }
                else
                {
                    alpha[i] = Math.Min(1.0, sum_neg);
                    sum_neg -= alpha[i];
                }

            double[] zeros = new double[l];

            for (i = 0; i < l; i++)
                zeros[i] = 0;

            Solver_NU s = new Solver_NU();
            s.Solve(l, new SVC_Q(prob, param, y), zeros, y, alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
            double r = si.r;

            Procedures.info("C = " + 1 / r + "\n");

            for (i = 0; i < l; i++)
                alpha[i] *= y[i] / r;

            si.rho /= r;
            si.obj /= (r * r);
            si.upper_bound_p = 1 / r;
            si.upper_bound_n = 1 / r;
        }
예제 #23
0
        //flower pollination algorithm by Yang
        public Problem FlowerPollination(Problem prob)
        {
            int    nargin = 0, totalInstances = prob.X.Count(), maxGeneration = 500;
            int    numOfFlower       = 10;  //population size
            double probabilitySwitch = 0.8; //assign probability switch
            int    subsetSize        = 200; //dimension for each flower

            double[] flowerFitnessVal    = new double[numOfFlower];
            double[] newFlowerFitnessVal = new double[numOfFlower];
            FireflyInstanceSelection fw  = new FireflyInstanceSelection();
            double globalBest            = double.MinValue;
            double newBest = new double();
            ObjectInstanceSelection globalBestFlower = null;
            int lowerBound = -2; //set lower bound - lower boundary
            int upperBound = 2;  //set upper bound - upper boundary
            int maxIndex;

            //inittalize flowers, and get global best
            List <ObjectInstanceSelection> flowers    = InitializeFlower(numOfFlower, subsetSize, totalInstances, prob); //initialize solution
            List <ObjectInstanceSelection> newFlowers = new List <ObjectInstanceSelection>(flowers.Count);               //create a clone of flowers

            flowers.ForEach((item) =>
            {
                newFlowers.Add(new ObjectInstanceSelection(item.__Attribute_Values, item.__Attribute_Values_Continuous, item.__Pointers, item.__Fitness)); //create a clone of flowers
            });

            flowerFitnessVal    = fw.EvaluateObjectiveFunction(flowers, prob);                                                             //evaluate fitness value for all the flowers
            newFlowerFitnessVal = fw.EvaluateObjectiveFunction(newFlowers, prob);                                                          //evaluate fitness value for new flowers. Note: this will be the same for this function call, since pollination has not occur
            FlowerFitness(flowerFitnessVal, flowers);                                                                                      //fitness value for each flower
            FlowerFitness(newFlowerFitnessVal, newFlowers);                                                                                //fitness value for new flower
            globalBestFlower = EvaluateSolution(flowerFitnessVal, newFlowerFitnessVal, globalBest, flowers, newFlowers, globalBestFlower); //get the global best flower
            globalBest       = flowerFitnessVal.Max();

            //start flower algorithm
            Random r = new Random();

            double[] levy = new double[subsetSize];
            for (int i = 0; i < maxGeneration; i++)
            {
                double rand = r.NextDouble();
                if (rand > probabilitySwitch) //global pollination
                {
                    //global pollination
                    for (int j = 0; j < numOfFlower; j++)
                    {
                        levy = LevyFlight(subsetSize);
                        for (int k = 0; k < subsetSize; k++)
                        {
                            double A = levy[k] * (flowers[j].__Attribute_Values_Continuous[k] - globalBestFlower.__Attribute_Values_Continuous[k]);
                            double B = flowers[j].__Attribute_Values_Continuous[k] + A;
                            A = SimpleBounds(B, lowerBound, upperBound);                                     //ensure that value does not go beyond defined boundary
                            newFlowers[j].__Attribute_Values_Continuous[k] = A;
                            newFlowers[j].__Attribute_Values[k]            = fw.Binarize(B, r.NextDouble()); //convert to binary
                        }
                    }
                }
                else //local pollination
                {
                    for (int j = 0; j < numOfFlower; j++)
                    {
                        List <int> randNum = Training.GetRandomNumbers(2, numOfFlower); //generate 2 distinct random numbers
                        double     epsilon = rand;

                        //local pollination
                        for (int k = 0; k < subsetSize; k++)
                        {
                            double A = flowers[j].__Attribute_Values_Continuous[k] + epsilon * (flowers[randNum[0]].__Attribute_Values_Continuous[k] - flowers[randNum[1]].__Attribute_Values_Continuous[k]); //randomly select two flowers from neighbourhood for pollination
                            A = SimpleBounds(A, lowerBound, upperBound);                                                                                                                                      //ensure that value does not exceed defined boundary
                            newFlowers[j].__Attribute_Values_Continuous[k] = A;                                                                                                                               //save computation
                            newFlowers[j].__Attribute_Values[k]            = fw.Binarize(A, r.NextDouble());                                                                                                  //convert to binary
                        }
                    }
                }

                //evaluate new solution
                newFlowerFitnessVal = fw.EvaluateObjectiveFunction(newFlowers, prob);                                                          //evaluate fitness value for all the flowers
                FlowerFitness(newFlowerFitnessVal, newFlowers);                                                                                //fitness value for new flower
                globalBestFlower = EvaluateSolution(flowerFitnessVal, newFlowerFitnessVal, globalBest, flowers, newFlowers, globalBestFlower); //Evaluate solution, update better solution and get global best flower
                globalBest       = flowerFitnessVal.Max();
            }

            //ensure that at least, 40 instances is selected for classification
            int countSelected = globalBestFlower.__Attribute_Values.Count(q => q == 1); //count the total number of selected instances
            int diff, c = 0, d = 0;
            int Min = 40;                                                               //minimum number of selected instances

            if (countSelected < Min)
            {
                //if there are less than N, add N instances, where N = the number of selected instances
                diff = Min - countSelected;
                while (c < diff)
                {
                    if (globalBestFlower.__Attribute_Values[d++] == 1)
                    {
                        continue;
                    }
                    else
                    {
                        globalBestFlower.__Attribute_Values[d++] = 1;
                        c++;
                    }
                }
            }

            Problem subBest = fw.buildModel(globalBestFlower, prob); //build model for the best Instance Mast

            return(subBest);
        }
예제 #24
0
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Uses the default values of C and Gamma.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="validation">The validation data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="outputFile">The output file for the parameter results</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public static void Grid(
     Problem problem,
     Problem validation,
     Parameter parameters,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, validation, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, out C, out Gamma);
 }
예제 #25
0
        /// <summary>
        /// Evaluate Objective Function
        /// </summary>
        public double[] EvaluateObjectiveFunction(List <ObjectInstanceSelection> Flowers, Problem prob)
        {
            int NB  = Flowers.Count;                                 //NF -> number of fireflies
            int tNI = Flowers.ElementAt(0).Attribute_Values.Count(); //size of each Instance Mask

            double[] fitness = new double[NB];
            int      sum;


            List <double> y = new List <double>();
            List <Node[]> x = new List <Node[]>();

            double C, Gamma;

            for (int i = 0; i < NB; i++)
            {
                //building model for each instance in instance mask in each firefly object
                Problem subProb = fi.buildModel(Flowers.ElementAt(i), prob);

                Parameter param = new Parameter();
                if (subProb != null)
                {
                    int countP = subProb.Y.Count(k => k == 1);  //counting the total number of positive instance in the subpeoblem
                    int countN = subProb.Y.Count(k => k == -1); //counting the total number of negative instance in the subproblem

                    if (countN <= 1 || countP <= 1)             //ensuring that there are at least two positive or negative instance in a subproblem
                    {
                        int m = 0;
                        if (countN <= 1)
                        {
                            for (int k = 0; k < prob.Count; k++) //if no negative instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                            {
                                if (prob.Y[k] == -1)
                                {
                                    subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                    subProb.Y[m] = prob.Y[k]; //insert label
                                    m++;
                                }
                                if (m == 2)
                                {
                                    break;
                                }
                            }
                        }
                        else if (countP <= 1)
                        {
                            for (int k = 0; k < prob.Count; k++) //if no positive instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                            {
                                if (prob.Y[k] == 1)
                                {
                                    subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                    subProb.Y[m] = prob.Y[k]; //insert label
                                    m++;
                                }
                                if (m == 2)
                                {
                                    break;
                                }
                            }
                        }
                    }

                    Problem subP                  = Training.ClusteringBoundaryInstance(subProb);
                    int     count                 = Flowers.ElementAt(i).Attribute_Values.Count(q => q == 1); //total number of selected instances, to be used for subsetSize
                    double  perRedBInstances      = ((double)subProb.Count / (double)subP.Count);             //percentage reduction for boundary instances
                    double  perRedFlowerInstances = (double)(tNI - count) / tNI;                              //percentage reduction for flower instances
                    //fitness[i] = (100 * perRedFlowerInstances);
                    fitness[i] = (100 * perRedFlowerInstances) + perRedBInstances;
                    //fitness[i] = 100 * ((double)count / (double)tNI);
                }
            }

            return(fitness);
        }
예제 #26
0
        private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            Problem problem = new Problem(_X.Count, _Y.ToArray(), _X.ToArray(), 2);
            RangeTransform range = RangeTransform.Compute(problem);
            problem = range.Scale(problem);

            Parameter param = new Parameter();
            param.C = 2;
            param.Gamma = .5;
            Model model = Training.Train(problem, param);

            Model.Write("model.txt", model);

            int rows = ClientSize.Height;
            int columns = ClientSize.Width;
            Bitmap image = new Bitmap(columns, rows);
            int centerR = rows / 2;
            int centerC = columns / 2;
            BitmapData buf = image.LockBits(new Rectangle(0, 0, columns, rows), ImageLockMode.WriteOnly, PixelFormat.Format24bppRgb);
            unsafe
            {
                byte* ptr = (byte*)buf.Scan0;
                int stride = buf.Stride;

                for (int r = 0; r < rows; r++)
                {
                    byte* scan = ptr;
                    for (int c = 0; c < columns; c++)
                    {
                        int x = c - centerC;
                        int y = r - centerR;
                        Node[] test = new Node[] { new Node(1, x), new Node(2, y) };
                        test = range.Transform(test);
                        int assignment = (int)Prediction.Predict(model, test);
                        //int assignment = (int)Prediction.Predict(problem, "predict.txt", model, test);

                        *scan++ = CLASS_FILL[assignment].B;
                        *scan++ = CLASS_FILL[assignment].G;
                        *scan++ = CLASS_FILL[assignment].R;
                    }
                    ptr += stride;
                }
            }
            image.UnlockBits(buf);
            lock (this)
            {
                _canvas = new Bitmap(image);
            }
        }
예제 #27
0
        /// <summary>
        /// generating the initial locations of n flower
        /// </summary>
        public List <ObjectInstanceSelection> InitializeFlower(int nFlower, int subsetSize, int probSize, Problem prob)
        {
            Random     rnd  = new Random();
            List <int> rNum = Training.GetRandomNumbers(probSize, probSize); //generate N random numbers
            FireflyInstanceSelection fpa = new FireflyInstanceSelection();

            List <ObjectInstanceSelection> attr_values = new List <ObjectInstanceSelection>();
            int cnt1 = 0, cnt2 = 0, cnt3 = 0;

            //create an array of size n for x and y
            int[]    xn = new int[subsetSize];        //instance mask
            double[] xn_Con = new double[subsetSize]; //instance mask continuous
            int[]    pointers = new int[subsetSize];  //array contain pointer to actual individual instance represented in the instance mask
            int      k = 0;

            for (int i = 0; i < nFlower; i++)
            {
                xn       = new int[subsetSize];
                xn_Con   = new double[subsetSize];
                pointers = new int[subsetSize];
                cnt1     = 0; cnt2 = 0; cnt3 = 0;
                for (int j = 0; j < prob.Count; j++)
                {
                    if (cnt1 < (0.7 * subsetSize) && prob.Y[rNum[j]] == 1) //select 70% positive instance of the subset
                    {
                        xn_Con[cnt3]   = rnd.NextDouble();
                        xn[cnt3]       = fpa.Binarize(xn_Con[cnt3], rnd.NextDouble()); //convert generated random number to binary
                        pointers[cnt3] = rNum[j];
                        k++; cnt1++; cnt3++;
                    }
                    else if (cnt2 < (0.3 * subsetSize) && prob.Y[rNum[j]] == -1)
                    {
                        xn_Con[cnt3]   = rnd.NextDouble();
                        xn[cnt3]       = fpa.Binarize(xn_Con[cnt3], rnd.NextDouble()); //convert generated random number to binary
                        pointers[cnt3] = rNum[j];
                        k++; cnt2++; cnt3++;
                    }
                    if (cnt3 >= subsetSize)
                    {
                        break;
                    }
                }

                ObjectInstanceSelection OI = new ObjectInstanceSelection(xn, xn_Con, pointers, 0.0);
                attr_values.Add(OI);
            }

            return(attr_values);
        }
예제 #28
0
        ///
        public override void LearnAttributeToFactorMapping()
        {
            var svm_features = new List<Node[]>();
            var relevant_items  = new List<int>();
            for (int i = 0; i < MaxItemID + 1; i++)
            {
                // ignore items w/o collaborative data
                if (Feedback.ItemMatrix[i].Count == 0)
                    continue;
                // ignore items w/o attribute data
                if (item_attributes[i].Count == 0)
                    continue;

                svm_features.Add( CreateNodes(i) );
                relevant_items.Add(i);
            }

            // TODO proper random seed initialization

            Node[][] svm_features_array = svm_features.ToArray();
            var svm_parameters = new Parameter();
            svm_parameters.SvmType = SvmType.EPSILON_SVR;
            //svm_parameters.SvmType = SvmType.NU_SVR;
            svm_parameters.C     = this.c;
            svm_parameters.Gamma = this.gamma;

            models = new Model[num_factors];
            for (int f = 0; f < num_factors; f++)
            {
                double[] targets = new double[svm_features.Count];
                for (int i = 0; i < svm_features.Count; i++)
                {
                    int item_id = relevant_items[i];
                    targets[i] = item_factors[item_id, f];
                }

                Problem svm_problem = new Problem(svm_features.Count, targets, svm_features_array, NumItemAttributes - 1);
                models[f] = SVM.Training.Train(svm_problem, svm_parameters);
            }

            _MapToLatentFactorSpace = Utils.Memoize<int, float[]>(__MapToLatentFactorSpace);
        }
예제 #29
0
        private static void parseCommandLine(string[] args, out Parameter parameters, out Problem problem, out bool crossValidation, out int nrfold, out string modelFilename)
        {
            int i;

            parameters = new Parameter();
            // default values

            crossValidation = false;
            nrfold          = 0;

            // parse options
            for (i = 0; i < args.Length; i++)
            {
                if (args[i][0] != '-')
                {
                    break;
                }
                ++i;
                switch (args[i - 1][1])
                {
                case 's':
                    parameters.SvmType = (SvmType)int.Parse(args[i]);
                    break;

                case 't':
                    parameters.KernelType = (KernelType)int.Parse(args[i]);
                    break;

                case 'd':
                    parameters.Degree = int.Parse(args[i]);
                    break;

                case 'g':
                    parameters.Gamma = double.Parse(args[i]);
                    break;

                case 'r':
                    parameters.Coefficient0 = double.Parse(args[i]);
                    break;

                case 'n':
                    parameters.Nu = double.Parse(args[i]);
                    break;

                case 'm':
                    parameters.CacheSize = double.Parse(args[i]);
                    break;

                case 'c':
                    parameters.C = double.Parse(args[i]);
                    break;

                case 'e':
                    parameters.EPS = double.Parse(args[i]);
                    break;

                case 'p':
                    parameters.P = double.Parse(args[i]);
                    break;

                case 'h':
                    parameters.Shrinking = int.Parse(args[i]) == 1;
                    break;

                case 'b':
                    parameters.Probability = int.Parse(args[i]) == 1;
                    break;

                case 'v':
                    crossValidation = true;
                    nrfold          = int.Parse(args[i]);
                    if (nrfold < 2)
                    {
                        throw new ArgumentException("n-fold cross validation: n must >= 2");
                    }
                    break;

                case 'w':
                    parameters.Weights[int.Parse(args[i - 1].Substring(2))] = double.Parse(args[1]);
                    break;

                default:
                    throw new ArgumentException("Unknown Parameter");
                }
            }

            // determine filenames

            if (i >= args.Length)
            {
                throw new ArgumentException("No input file specified");
            }

            problem = Problem.Read(args[i]);

            if (parameters.Gamma == 0)
            {
                parameters.Gamma = 1.0 / problem.MaxIndex;
            }

            if (i < args.Length - 1)
            {
                modelFilename = args[i + 1];
            }
            else
            {
                int p = args[i].LastIndexOf('/') + 1;
                modelFilename = args[i].Substring(p) + ".model";
            }
        }
예제 #30
0
        /// <summary>
        /// generating the initial locations of n Cuckoo
        /// </summary>
        public List <ObjectInstanceSelection> InitializeBinaryCuckoo(int nNests, int subsetSize, int probSize, Problem prob)
        {
            //Random rnd = new Random();
            //List<int> rNum = Training.GetRandomNumbers(probSize, probSize); //generate N random numbers

            List <ObjectInstanceSelection> attr_values = new List <ObjectInstanceSelection>();
            //int cnt1 = 0, cnt2 = 0, cnt3 = 0;
            //create an array of size n for x and y
            Random rnd = new Random();

            //List<int> rNum = Training.GetRandomNumbers(probSize, probSize); //generate N random numbers
            int[]    xn     = new int[subsetSize];    //instance mask
            double[] xn_Con = new double[subsetSize]; //instance mask continuous

            //int[] pointers = new int[subsetSize]; //array contain pointer to actual individual instance represented in the instance mask
            List <double> classes = fi.getClassLabels(prob.Y); //get the class labels
            int           nClass  = classes.Count;
            int           div     = subsetSize / nClass;

            //double freq = new double(); //initialize the frequency of all the bats to zero
            //double[] vel = new double[subsetSize]; //initialize the velocity of all the bats to zero

            //select pointers to instances for all the particles


            //int k = 0;
            if (nClass > 2)                                                                              //do this for multi-class problems
            {
                int[] pointers = Training.AssignClassPointers_MultipleClass(prob, subsetSize, probSize); //array contain pointer to actual individual instance represented in the instance mask
                for (int a = 0; a < nNests; a++)
                {
                    xn     = new int[subsetSize];    //instance mask
                    xn_Con = new double[subsetSize]; //instance mask continuous

                    for (int j = 0; j < subsetSize; j++)
                    {
                        xn[j] = rnd.Next(0, 2);
                    }

                    //Training.InstanceMask_MultipleClass(prob, subsetSize, probSize, out xn); //initialize instance mask
                    ObjectInstanceSelection OI = new ObjectInstanceSelection(xn, xn_Con, pointers, 0.0);
                    attr_values.Add(OI);
                }
            }
            else //do this for binary class problem
            {
                int[] pointers = Training.AssignClassPointersBinary(prob, probSize, subsetSize); //array contain pointer to actual individual instance represented in the instance mask
                for (int i = 0; i < nNests; i++)
                {
                    xn     = new int[subsetSize];
                    xn_Con = new double[subsetSize];
                    //pointers = new int[subsetSize];
                    //cnt1 = 0; cnt2 = 0; cnt3 = 0;

                    for (int j = 0; j < subsetSize; j++)
                    {
                        xn[j] = rnd.Next(0, 2);
                    }

                    //Training.InstanceMask_Binary(prob, subsetSize, pointers, out xn);
                    ObjectInstanceSelection OI = new ObjectInstanceSelection(xn, xn_Con, pointers, 0.0);
                    attr_values.Add(OI);

                    //for (int j = 0; j < prob.Count; j++)
                    //{
                    //    if (cnt1 < (0.7 * subsetSize) && prob.Y[rNum[j]] == -1) //select 70% positive instance of the subset
                    //    {
                    //        xn[cnt3] = rnd.Next(0, 2);
                    //        pointers[cnt3] = rNum[j];
                    //        k++; cnt1++; cnt3++;
                    //    }
                    //    else if (cnt2 < (0.3 * subsetSize) && prob.Y[rNum[j]] == 1)
                    //    {
                    //        xn[cnt3] = rnd.Next(0, 2);
                    //        pointers[cnt3] = rNum[j];
                    //        k++; cnt2++; cnt3++;
                    //    }
                    //    if (cnt3 >= subsetSize)
                    //        break;
                    //}
                }
            }

            return(attr_values);
        }
예제 #31
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        // Stratified cross validation
        public static void svm_cross_validation(Problem prob, Parameter param, int nr_fold, double[] target)
        {
            Random rand = new Random();
            int i;
            int[] fold_start = new int[nr_fold + 1];
            int l = prob.Count;
            int[] perm = new int[l];

            // stratified cv may not give leave-one-out rate
            // Each class to l folds -> some folds may have zero elements
            if ((param.SvmType == SvmType.C_SVC ||
                param.SvmType == SvmType.NU_SVC) && nr_fold < l)
            {
                int[] tmp_nr_class = new int[1];
                int[][] tmp_label = new int[1][];
                int[][] tmp_start = new int[1][];
                int[][] tmp_count = new int[1][];

                svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);

                int nr_class = tmp_nr_class[0];
                //int[] label = tmp_label[0];
                int[] start = tmp_start[0];
                int[] count = tmp_count[0];

                // random shuffle and then data grouped by fold using the array perm
                int[] fold_count = new int[nr_fold];
                int c;
                int[] index = new int[l];
                for (i = 0; i < l; i++)
                    index[i] = perm[i];
                for (c = 0; c < nr_class; c++)
                    for (i = 0; i < count[c]; i++)
                    {
                        int j = i + (int)(rand.NextDouble() * (count[c] - i));
                        do { int _ = index[start[c] + j]; index[start[c] + j] = index[start[c] + i]; index[start[c] + i] = _; } while (false);
                    }
                for (i = 0; i < nr_fold; i++)
                {
                    fold_count[i] = 0;
                    for (c = 0; c < nr_class; c++)
                        fold_count[i] += (i + 1) * count[c] / nr_fold - i * count[c] / nr_fold;
                }
                fold_start[0] = 0;
                for (i = 1; i <= nr_fold; i++)
                    fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
                for (c = 0; c < nr_class; c++)
                    for (i = 0; i < nr_fold; i++)
                    {
                        int begin = start[c] + i * count[c] / nr_fold;
                        int end = start[c] + (i + 1) * count[c] / nr_fold;
                        for (int j = begin; j < end; j++)
                        {
                            perm[fold_start[i]] = index[j];
                            fold_start[i]++;
                        }
                    }
                fold_start[0] = 0;
                for (i = 1; i <= nr_fold; i++)
                    fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
            }
            else
            {
                for (i = 0; i < l; i++) perm[i] = i;
                for (i = 0; i < l; i++)
                {
                    int j = i + (int)(rand.NextDouble() * (l - i));
                    do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
                }
                for (i = 0; i <= nr_fold; i++)
                    fold_start[i] = i * l / nr_fold;
            }

            for (i = 0; i < nr_fold; i++)
            {
                int begin = fold_start[i];
                int end = fold_start[i + 1];
                int j, k;
                Problem subprob = new Problem();

                subprob.Count = l - (end - begin);
                subprob.X = new Node[subprob.Count][];
                subprob.Y = new double[subprob.Count];

                k = 0;
                for (j = 0; j < begin; j++)
                {
                    subprob.X[k] = prob.X[perm[j]];
                    subprob.Y[k] = prob.Y[perm[j]];
                    ++k;
                }
                for (j = end; j < l; j++)
                {
                    subprob.X[k] = prob.X[perm[j]];
                    subprob.Y[k] = prob.Y[perm[j]];
                    ++k;
                }
                Model submodel = svm_train(subprob, param);
                if (param.Probability &&
                   (param.SvmType == SvmType.C_SVC ||
                    param.SvmType == SvmType.NU_SVC))
                {
                    double[] prob_estimates = new double[svm_get_nr_class(submodel)];
                    for (j = begin; j < end; j++)
                        target[perm[j]] = svm_predict_probability(submodel, prob.X[perm[j]], prob_estimates);
                }
                else
                    for (j = begin; j < end; j++)
                        target[perm[j]] = svm_predict(submodel, prob.X[perm[j]]);
            }
        }
예제 #32
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        private static void solve_one_class(Problem prob, Parameter param,
                        double[] alpha, Solver.SolutionInfo si)
        {
            int l = prob.Count;
            double[] zeros = new double[l];
            sbyte[] ones = new sbyte[l];
            int i;

            int n = (int)(param.Nu * prob.Count);	// # of alpha's at upper bound

            for (i = 0; i < n; i++)
                alpha[i] = 1;
            if (n < prob.Count)
                alpha[n] = param.Nu * prob.Count - n;
            for (i = n + 1; i < l; i++)
                alpha[i] = 0;

            for (i = 0; i < l; i++)
            {
                zeros[i] = 0;
                ones[i] = 1;
            }

            Solver s = new Solver();
            s.Solve(l, new ONE_CLASS_Q(prob, param), zeros, ones, alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
        }
예제 #33
0
파일: Solver.cs 프로젝트: wendelad/RecSys
 public SVC_Q(Problem prob, Parameter param, sbyte[] y_)
     : base(prob.Count, prob.X, param)
 {
     y = (sbyte[])y_.Clone();
     cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
     QD = new float[prob.Count];
     for (int i = 0; i < prob.Count; i++)
         QD[i] = (float)KernelFunction(i, i);
 }
예제 #34
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
        // perm, length l, must be allocated before calling this subroutine
        private static void svm_group_classes(Problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
        {
            int l = prob.Count;
            int Max_nr_class = 16;
            int nr_class = 0;
            int[] label = new int[Max_nr_class];
            int[] count = new int[Max_nr_class];
            int[] data_label = new int[l];
            int i;

            for (i = 0; i < l; i++)
            {
                int this_label = (int)(prob.Y[i]);
                int j;
                for (j = 0; j < nr_class; j++)
                {
                    if (this_label == label[j])
                    {
                        ++count[j];
                        break;
                    }
                }
                data_label[i] = j;
                if (j == nr_class)
                {
                    if (nr_class == Max_nr_class)
                    {
                        Max_nr_class *= 2;
                        int[] new_data = new int[Max_nr_class];
                        Array.Copy(label, 0, new_data, 0, label.Length);
                        label = new_data;
                        new_data = new int[Max_nr_class];
                        Array.Copy(count, 0, new_data, 0, count.Length);
                        count = new_data;
                    }
                    label[nr_class] = this_label;
                    count[nr_class] = 1;
                    ++nr_class;
                }
            }

            int[] start = new int[nr_class];
            start[0] = 0;
            for (i = 1; i < nr_class; i++)
                start[i] = start[i - 1] + count[i - 1];
            for (i = 0; i < l; i++)
            {
                perm[start[data_label[i]]] = i;
                ++start[data_label[i]];
            }
            start[0] = 0;
            for (i = 1; i < nr_class; i++)
                start[i] = start[i - 1] + count[i - 1];

            nr_class_ret[0] = nr_class;
            label_ret[0] = label;
            start_ret[0] = start;
            count_ret[0] = count;
        }
예제 #35
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        //
        // Interface functions
        //
        public static Model svm_train(Problem prob, Parameter param)
        {
            Model model = new Model();
            model.Parameter = param;

            if (param.SvmType == SvmType.ONE_CLASS ||
               param.SvmType == SvmType.EPSILON_SVR ||
               param.SvmType == SvmType.NU_SVR)
            {
                // regression or one-class-svm
                model.NumberOfClasses = 2;
                model.ClassLabels = null;
                model.NumberOfSVPerClass = null;
                model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null;
                model.SupportVectorCoefficients = new double[1][];

                if (param.Probability &&
                   (param.SvmType == SvmType.EPSILON_SVR ||
                    param.SvmType == SvmType.NU_SVR))
                {
                    model.PairwiseProbabilityA = new double[1];
                    model.PairwiseProbabilityA[0] = svm_svr_probability(prob, param);
                }

                decision_function f = svm_train_one(prob, param, 0, 0);
                model.Rho = new double[1];
                model.Rho[0] = f.rho;

                int nSV = 0;
                int i;
                for (i = 0; i < prob.Count; i++)
                    if (Math.Abs(f.alpha[i]) > 0) ++nSV;
                model.SupportVectorCount = nSV;
                model.SupportVectors = new Node[nSV][];
                model.SupportVectorCoefficients[0] = new double[nSV];
                int j = 0;
                for (i = 0; i < prob.Count; i++)
                    if (Math.Abs(f.alpha[i]) > 0)
                    {
                        model.SupportVectors[j] = prob.X[i];
                        model.SupportVectorCoefficients[0][j] = f.alpha[i];
                        ++j;
                    }
            }
            else
            {
                // classification
                int l = prob.Count;
                int[] tmp_nr_class = new int[1];
                int[][] tmp_label = new int[1][];
                int[][] tmp_start = new int[1][];
                int[][] tmp_count = new int[1][];
                int[] perm = new int[l];

                // group training data of the same class
                svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);
                int nr_class = tmp_nr_class[0];
                int[] label = tmp_label[0];
                int[] start = tmp_start[0];
                int[] count = tmp_count[0];
                Node[][] x = new Node[l][];
                int i;
                for (i = 0; i < l; i++)
                    x[i] = prob.X[perm[i]];

                // calculate weighted C

                double[] weighted_C = new double[nr_class];
                for (i = 0; i < nr_class; i++)
                    weighted_C[i] = param.C;
                foreach (int weightedLabel in param.Weights.Keys)
                {
                    int index = Array.IndexOf<int>(label, weightedLabel);
                    if (index < 0)
                        Console.Error.WriteLine("warning: class label " + weightedLabel + " specified in weight is not found");
                    else weighted_C[index] *= param.Weights[weightedLabel];
                }

                // train k*(k-1)/2 models

                bool[] nonzero = new bool[l];
                for (i = 0; i < l; i++)
                    nonzero[i] = false;
                decision_function[] f = new decision_function[nr_class * (nr_class - 1) / 2];

                double[] probA = null, probB = null;
                if (param.Probability)
                {
                    probA = new double[nr_class * (nr_class - 1) / 2];
                    probB = new double[nr_class * (nr_class - 1) / 2];
                }

                int p = 0;
                for (i = 0; i < nr_class; i++)
                    for (int j = i + 1; j < nr_class; j++)
                    {
                        Problem sub_prob = new Problem();
                        int si = start[i], sj = start[j];
                        int ci = count[i], cj = count[j];
                        sub_prob.Count = ci + cj;
                        sub_prob.X = new Node[sub_prob.Count][];
                        sub_prob.Y = new double[sub_prob.Count];
                        int k;
                        for (k = 0; k < ci; k++)
                        {
                            sub_prob.X[k] = x[si + k];
                            sub_prob.Y[k] = +1;
                        }
                        for (k = 0; k < cj; k++)
                        {
                            sub_prob.X[ci + k] = x[sj + k];
                            sub_prob.Y[ci + k] = -1;
                        }

                        if (param.Probability)
                        {
                            double[] probAB = new double[2];
                            svm_binary_svc_probability(sub_prob, param, weighted_C[i], weighted_C[j], probAB);
                            probA[p] = probAB[0];
                            probB[p] = probAB[1];
                        }

                        f[p] = svm_train_one(sub_prob, param, weighted_C[i], weighted_C[j]);
                        for (k = 0; k < ci; k++)
                            if (!nonzero[si + k] && Math.Abs(f[p].alpha[k]) > 0)
                                nonzero[si + k] = true;
                        for (k = 0; k < cj; k++)
                            if (!nonzero[sj + k] && Math.Abs(f[p].alpha[ci + k]) > 0)
                                nonzero[sj + k] = true;
                        ++p;
                    }

                // build output

                model.NumberOfClasses = nr_class;

                model.ClassLabels = new int[nr_class];
                for (i = 0; i < nr_class; i++)
                    model.ClassLabels[i] = label[i];

                model.Rho = new double[nr_class * (nr_class - 1) / 2];
                for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
                    model.Rho[i] = f[i].rho;

                if (param.Probability)
                {
                    model.PairwiseProbabilityA = new double[nr_class * (nr_class - 1) / 2];
                    model.PairwiseProbabilityB = new double[nr_class * (nr_class - 1) / 2];
                    for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
                    {
                        model.PairwiseProbabilityA[i] = probA[i];
                        model.PairwiseProbabilityB[i] = probB[i];
                    }
                }
                else
                {
                    model.PairwiseProbabilityA = null;
                    model.PairwiseProbabilityB = null;
                }

                int nnz = 0;
                int[] nz_count = new int[nr_class];
                model.NumberOfSVPerClass = new int[nr_class];
                for (i = 0; i < nr_class; i++)
                {
                    int nSV = 0;
                    for (int j = 0; j < count[i]; j++)
                        if (nonzero[start[i] + j])
                        {
                            ++nSV;
                            ++nnz;
                        }
                    model.NumberOfSVPerClass[i] = nSV;
                    nz_count[i] = nSV;
                }

                Procedures.info("Total nSV = " + nnz + "\n");

                model.SupportVectorCount = nnz;
                model.SupportVectors = new Node[nnz][];
                p = 0;
                for (i = 0; i < l; i++)
                    if (nonzero[i]) model.SupportVectors[p++] = x[i];

                int[] nz_start = new int[nr_class];
                nz_start[0] = 0;
                for (i = 1; i < nr_class; i++)
                    nz_start[i] = nz_start[i - 1] + nz_count[i - 1];

                model.SupportVectorCoefficients = new double[nr_class - 1][];
                for (i = 0; i < nr_class - 1; i++)
                    model.SupportVectorCoefficients[i] = new double[nnz];

                p = 0;
                for (i = 0; i < nr_class; i++)
                    for (int j = i + 1; j < nr_class; j++)
                    {
                        // classifier (i,j): coefficients with
                        // i are in sv_coef[j-1][nz_start[i]...],
                        // j are in sv_coef[i][nz_start[j]...]

                        int si = start[i];
                        int sj = start[j];
                        int ci = count[i];
                        int cj = count[j];

                        int q = nz_start[i];
                        int k;
                        for (k = 0; k < ci; k++)
                            if (nonzero[si + k])
                                model.SupportVectorCoefficients[j - 1][q++] = f[p].alpha[k];
                        q = nz_start[j];
                        for (k = 0; k < cj; k++)
                            if (nonzero[sj + k])
                                model.SupportVectorCoefficients[i][q++] = f[p].alpha[ci + k];
                        ++p;
                    }
            }
            return model;
        }
예제 #36
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        static decision_function svm_train_one(Problem prob, Parameter param, double Cp, double Cn)
        {
            double[] alpha = new double[prob.Count];
            Solver.SolutionInfo si = new Solver.SolutionInfo();
            switch (param.SvmType)
            {
                case SvmType.C_SVC:
                    solve_c_svc(prob, param, alpha, si, Cp, Cn);
                    break;
                case SvmType.NU_SVC:
                    solve_nu_svc(prob, param, alpha, si);
                    break;
                case SvmType.ONE_CLASS:
                    solve_one_class(prob, param, alpha, si);
                    break;
                case SvmType.EPSILON_SVR:
                    solve_epsilon_svr(prob, param, alpha, si);
                    break;
                case SvmType.NU_SVR:
                    solve_nu_svr(prob, param, alpha, si);
                    break;
            }

            Procedures.info("obj = " + si.obj + ", rho = " + si.rho + "\n");

            // output SVs

            int nSV = 0;
            int nBSV = 0;
            for (int i = 0; i < prob.Count; i++)
            {
                if (Math.Abs(alpha[i]) > 0)
                {
                    ++nSV;
                    if (prob.Y[i] > 0)
                    {
                        if (Math.Abs(alpha[i]) >= si.upper_bound_p)
                            ++nBSV;
                    }
                    else
                    {
                        if (Math.Abs(alpha[i]) >= si.upper_bound_n)
                            ++nBSV;
                    }
                }
            }

            Procedures.info("nSV = " + nSV + ", nBSV = " + nBSV + "\n");

            decision_function f = new decision_function();
            f.alpha = alpha;
            f.rho = si.rho;
            return f;
        }
        //build model for binary problems
        public Problem buildModel(ObjectInstanceSelection firefly, Problem prob)
        {
            int           tNI = firefly.Attribute_Values.Count(); //size of each Instance Mask
            List <double> y = new List <double>();
            List <Node[]> x = new List <Node[]>();
            bool          pos = false, neg = false;

            //building model for each instance in instance mask in each firefly object
            for (int j = 0; j < tNI; j++)
            {
                if (firefly.__Attribute_Values[j] == 1) //if instance is selected, use for classification
                {
                    int p = firefly.__Pointers[j];
                    x.Add(prob.X[p]);
                    y.Add(prob.Y[p]);

                    if (prob.Y[p] == 1)
                    {
                        pos = true;
                    }
                    else if (prob.Y[p] == -1)
                    {
                        neg = true;
                    }
                }
                else
                {
                    continue;
                }
            }

            Node[][] X = new Node[x.Count][];
            double[] Y = new double[y.Count];

            //ensuring that the subproblem consist of both positive and negative instance
            int k      = 0;
            int countP = y.Count(r => r == 1);  //counting the total number of positive instance in the subpeoblem
            int countN = y.Count(r => r == -1); //counting the total number of negative instance in the subproble

            if (pos == false && neg == false)   //if no instance (positive and negative) was selected, return null. Don't perform any computation
            {
                return(null);
            }
            else if (pos == false || countP <= 1)    //if pos == false, then no positive instance is in the subproblem
            {
                for (int i = 0; i < prob.Count; i++) //if no positive instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                {
                    if (prob.Y[i] == 1)
                    {
                        x[k] = prob.X[i]; //insert negative instance in the first and second position
                        y[k] = prob.Y[i]; //insert label
                        k++;
                    }
                    if (k == 2)
                    {
                        break;
                    }
                }
            }
            else if (neg == false || countN <= 1) //if neg == false, then no negative instance is in the subproblem
            {
                k = 0;
                for (int i = 0; i < prob.Count; i++) //if no negative instance, search the whole subproblem and insert two negative instances in the first and second position of subproblem
                {
                    if (prob.Y[i] == -1)
                    {
                        x[k] = prob.X[i]; //insert negative instance in the first and second position
                        y[k] = prob.Y[i]; //insert label
                        k++;
                    }
                    if (k == 2)
                    {
                        break;
                    }
                }
            }

            x.CopyTo(X); //convert from list to double[] array
            y.CopyTo(Y);
            Problem subProb = new Problem(X.Count(), Y, X, X[0].GetLength(0));

            return(subProb);
        }
예제 #38
0
파일: Solver.cs 프로젝트: wendelad/RecSys
 public ONE_CLASS_Q(Problem prob, Parameter param)
     : base(prob.Count, prob.X, param)
 {
     cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
     QD = new float[prob.Count];
     for (int i = 0; i < prob.Count; i++)
         QD[i] = (float)KernelFunction(i, i);
 }
예제 #39
0
 /// <summary>
 /// Determines the Range transform for the provided problem.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <param name="lowerBound">The lower bound for scaling</param>
 /// <param name="upperBound">The upper bound for scaling</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(Problem prob, double lowerBound, double upperBound)
 {
     double[] minVals = new double[prob.MaxIndex];
     double[] maxVals = new double[prob.MaxIndex];
     for (int i = 0; i < prob.MaxIndex; i++)
     {
         minVals[i] = double.MaxValue;
         maxVals[i] = double.MinValue;
     }
     for (int i = 0; i < prob.Count; i++)
     {
         for (int j = 0; j < prob.X[i].Length; j++)
         {
             int index = prob.X[i][j].Index - 1;
             double value = prob.X[i][j].Value;
             minVals[index] = Math.Min(minVals[index], value);
             maxVals[index] = Math.Max(maxVals[index], value);
         }
     }
     for (int i = 0; i < prob.MaxIndex; i++)
     {
         if (minVals[i] == double.MaxValue || maxVals[i] == double.MinValue)
         {
             minVals[i] = 0;
             maxVals[i] = 0;
         }
     }
     return new RangeTransform(minVals, maxVals, lowerBound, upperBound);
 }
예제 #40
0
파일: Solver.cs 프로젝트: wendelad/RecSys
 public SVR_Q(Problem prob, Parameter param)
     : base(prob.Count, prob.X, param)
 {
     l = prob.Count;
     cache = new Cache(l, (long)(param.CacheSize * (1 << 20)));
     QD = new float[2 * l];
     sign = new sbyte[2 * l];
     index = new int[2 * l];
     for (int k = 0; k < l; k++)
     {
         sign[k] = 1;
         sign[k + l] = -1;
         index[k] = k;
         index[k + l] = k;
         QD[k] = (float)KernelFunction(k, k);
         QD[k + l] = QD[k];
     }
     buffer = new float[2][];
     buffer[0] = new float[2 * l];
     buffer[1] = new float[2 * l];
     next_buffer = 0;
 }
예제 #41
0
 /// <summary>
 /// Determines the Range transform for the provided problem.  Uses the default lower and upper bounds.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(Problem prob)
 {
     return Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND);
 }
예제 #42
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        public static string svm_check_parameter(Problem prob, Parameter param)
        {
            // svm_type

            SvmType svm_type = param.SvmType;

            // kernel_type, degree

            //KernelType kernel_type = param.KernelType;

            if (param.Degree < 0)
                return "degree of polynomial kernel < 0";

            // cache_size,eps,C,nu,p,shrinking

            if (param.CacheSize <= 0)
                return "cache_size <= 0";

            if (param.EPS <= 0)
                return "eps <= 0";

            if (param.Gamma == 0)
                param.Gamma = 1.0 / prob.MaxIndex;

            if (svm_type == SvmType.C_SVC ||
               svm_type == SvmType.EPSILON_SVR ||
               svm_type == SvmType.NU_SVR)
                if (param.C <= 0)
                    return "C <= 0";

            if (svm_type == SvmType.NU_SVC ||
               svm_type == SvmType.ONE_CLASS ||
               svm_type == SvmType.NU_SVR)
                if (param.Nu <= 0 || param.Nu > 1)
                    return "nu <= 0 or nu > 1";

            if (svm_type == SvmType.EPSILON_SVR)
                if (param.P < 0)
                    return "p < 0";

            if (param.Probability &&
               svm_type == SvmType.ONE_CLASS)
                return "one-class SVM probability output not supported yet";

            // check whether nu-svc is feasible

            if (svm_type == SvmType.NU_SVC)
            {
                int l = prob.Count;
                int Max_nr_class = 16;
                int nr_class = 0;
                int[] label = new int[Max_nr_class];
                int[] count = new int[Max_nr_class];

                int i;
                for (i = 0; i < l; i++)
                {
                    int this_label = (int)prob.Y[i];
                    int j;
                    for (j = 0; j < nr_class; j++)
                        if (this_label == label[j])
                        {
                            ++count[j];
                            break;
                        }

                    if (j == nr_class)
                    {
                        if (nr_class == Max_nr_class)
                        {
                            Max_nr_class *= 2;
                            int[] new_data = new int[Max_nr_class];
                            Array.Copy(label, 0, new_data, 0, label.Length);
                            label = new_data;

                            new_data = new int[Max_nr_class];
                            Array.Copy(count, 0, new_data, 0, count.Length);
                            count = new_data;
                        }
                        label[nr_class] = this_label;
                        count[nr_class] = 1;
                        ++nr_class;
                    }
                }

                for (i = 0; i < nr_class; i++)
                {
                    int n1 = count[i];
                    for (int j = i + 1; j < nr_class; j++)
                    {
                        int n2 = count[j];
                        if (param.Nu * (n1 + n2) / 2 > Math.Min(n1, n2))
                            return "specified nu is infeasible";
                    }
                }
            }

            return null;
        }
예제 #43
0
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if there is no validation data available, and it will
 /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times).
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="C">The optimal C value will be put into this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be put into this variable</param>
 public static void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues,
     List<double> GammaValues,
     string outputFile,
     out double C,
     out double Gamma)
 {
     Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma);
 }
예제 #44
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        private static void solve_epsilon_svr(Problem prob, Parameter param, double[] alpha, Solver.SolutionInfo si)
        {
            int l = prob.Count;
            double[] alpha2 = new double[2 * l];
            double[] linear_term = new double[2 * l];
            sbyte[] y = new sbyte[2 * l];
            int i;

            for (i = 0; i < l; i++)
            {
                alpha2[i] = 0;
                linear_term[i] = param.P - prob.Y[i];
                y[i] = 1;

                alpha2[i + l] = 0;
                linear_term[i + l] = param.P + prob.Y[i];
                y[i + l] = -1;
            }

            Solver s = new Solver();
            s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, param.C, param.C, param.EPS, si, param.Shrinking);

            double sum_alpha = 0;
            for (i = 0; i < l; i++)
            {
                alpha[i] = alpha2[i] - alpha2[i + l];
                sum_alpha += Math.Abs(alpha[i]);
            }
            Procedures.info("nu = " + sum_alpha / (param.C * l) + "\n");
        }
예제 #45
0
 /// <summary>
 /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the
 /// combination which performed best.  Use this method if validation data isn't available, as it will
 /// divide the training data and train on a portion of it and test on the rest.
 /// </summary>
 /// <param name="problem">The training data</param>
 /// <param name="parameters">The parameters to use when optimizing</param>
 /// <param name="CValues">The set of C values to use</param>
 /// <param name="GammaValues">The set of Gamma values to use</param>
 /// <param name="outputFile">Output file for the parameter results.</param>
 /// <param name="nrfold">The number of times the data should be divided for validation</param>
 /// <param name="C">The optimal C value will be placed in this variable</param>
 /// <param name="Gamma">The optimal Gamma value will be placed in this variable</param>
 public static void Grid(
     Problem problem,
     Parameter parameters,
     List<double> CValues, 
     List<double> GammaValues, 
     string outputFile,
     int nrfold,
     out double C,
     out double Gamma)
 {
     C = 0;
     Gamma = 0;
     double crossValidation = double.MinValue;
     StreamWriter output = null;
     if(outputFile != null)
         output = new StreamWriter(outputFile);
     for(int i=0; i<CValues.Count; i++)
         for (int j = 0; j < GammaValues.Count; j++)
         {
             parameters.C = CValues[i];
             parameters.Gamma = GammaValues[j];
             double test = Training.PerformCrossValidation(problem, parameters, nrfold);
             if(output != null)
                 output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test);
             if (test > crossValidation)
             {
                 C = parameters.C;
                 Gamma = parameters.Gamma;
                 crossValidation = test;
             }
         }
     if(output != null)
         output.Close();
 }
예제 #46
0
파일: Solver.cs 프로젝트: wendelad/RecSys
        private static void solve_nu_svr(Problem prob, Parameter param,
                        double[] alpha, Solver.SolutionInfo si)
        {
            int l = prob.Count;
            double C = param.C;
            double[] alpha2 = new double[2 * l];
            double[] linear_term = new double[2 * l];
            sbyte[] y = new sbyte[2 * l];
            int i;

            double sum = C * param.Nu * l / 2;
            for (i = 0; i < l; i++)
            {
                alpha2[i] = alpha2[i + l] = Math.Min(sum, C);
                sum -= alpha2[i];

                linear_term[i] = -prob.Y[i];
                y[i] = 1;

                linear_term[i + l] = prob.Y[i];
                y[i + l] = -1;
            }

            Solver_NU s = new Solver_NU();
            s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, C, C, param.EPS, si, param.Shrinking);

            Procedures.info("epsilon = " + (-si.r) + "\n");

            for (i = 0; i < l; i++)
                alpha[i] = alpha2[i] - alpha2[i + l];
        }
예제 #47
0
        /// <summary>
        /// Evaluate Objective Function
        /// </summary>
        //public double[] EvaluateObjectiveFunction(List<ObjectInstanceSelection> fireflies, List<double> accuracy, Problem prob)
        public double[] EvaluateObjectiveFunction(List <ObjectInstanceSelection> Cuckoos, Problem prob)
        {
            int NF  = Cuckoos.Count;                                 //NF -> number of fireflies
            int tNI = Cuckoos.ElementAt(0).Attribute_Values.Count(); //size of each Instance Mask

            double[] fitness = new double[NF];
            int      sum;

            List <double> classes = fi.getClassLabels(prob.Y); //get the class labels
            int           nClass  = classes.Count;

            List <double> y = new List <double>();
            List <Node[]> x = new List <Node[]>();

            double C, Gamma;

            for (int i = 0; i < NF; i++)
            {
                //building model for each instance in instance mask in each firefly object
                Problem subProb = fi.buildModel(Cuckoos.ElementAt(i), prob);

                Parameter param = new Parameter();
                if (subProb != null)
                {
                    if (nClass == 2)
                    {
                        int countP = subProb.Y.Count(k => k == 1);  //counting the total number of positive instance in the subpeoblem
                        int countN = subProb.Y.Count(k => k == -1); //counting the total number of negative instance in the subproblem

                        if (countN <= 1 || countP <= 1)             //ensuring that there are at least two positive or negative instance in a subproblem
                        {
                            int m = 0;
                            if (countN <= 1)
                            {
                                for (int k = 0; k < prob.Count; k++) //if no negative instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                                {
                                    if (prob.Y[k] == -1)
                                    {
                                        subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                        subProb.Y[m] = prob.Y[k]; //insert label
                                        m++;
                                    }
                                    if (m == 2)
                                    {
                                        break;
                                    }
                                }
                            }
                            else if (countP <= 1)
                            {
                                for (int k = 0; k < prob.Count; k++) //if no positive instance, search the whole subproblem and insert two postive instance in the first and second position of subproblem
                                {
                                    if (prob.Y[k] == 1)
                                    {
                                        subProb.X[m] = prob.X[k]; //insert negative instance in the first and second position
                                        subProb.Y[m] = prob.Y[k]; //insert label
                                        m++;
                                    }
                                    if (m == 2)
                                    {
                                        break;
                                    }
                                }
                            }
                        }
                    }

                    Problem subP = Training.ClusteringBoundaryInstance(subProb);
                    //int subProbCount = subP.Count; //number of selected boundary instances
                    int count = Cuckoos.ElementAt(i).__Attribute_Values.Count(q => q == 1); //total number of selected instances, to be used for subsetSize
                    //double percentageReduction = 100 * (tNI - count) / tNI; //calculating percentage reduction for each instance Mask
                    //double perRedBnstances = (double)(subProb.Count - subProbCount) / subProb.Count; //percentage reduction for boundary instances
                    //double perRedBInstances = (double)(subProb.Count - subP.Count) * subP.Count; //percentage reduction for boundary instances
                    double perRedBInstances      = (double)(subProb.Count / subP.Count); //percentage reduction for boundary instances
                    double perRedCuckooInstances = (double)(tNI - count) / tNI;          //percentage reduction for cuckoo instances

                    //fitness[i] = perRedCuckooInstances * 100;
                    fitness[i] = (100 * perRedCuckooInstances) + perRedBInstances;
                    //fitness[i] = 100 * ((double)count / (double)tNI);
                    //fitness[i] = 100 * perRedBnstances;
                    //fitness[i] = 100 * (perRedBnstances + perRedCuckooInstances);
                    //fitness[i] = (W_SeelctedBoundaryInstances * subProbCount) + (W_Instances * ((tNI - count) / tNI));
                    //fitness[i] = percentageReduction;
                }
            }

            return(fitness);
        }