Example #1
0
        public void C_SVC_Should_predict_perfectly_XOR_dataset_with_polynomial_kernel()
        {
            // note : K(u; v) = (u  v + 1)^2 kernel is able to feet exactly the xor function
            // see http://www.doc.ic.ac.uk/~dfg/ProbabilisticInference/IDAPILecture18.pdf for more infos
            var svm = new C_SVC(xor_problem, KernelHelper.PolynomialKernel(2, 1, 1), 1);

            checkXOR(svm);
        }
Example #2
0
        public void C_SVC_should_enable_to_export_and_import_svm_models()
        {
            // note : K(u; v) = (u  v + 1)^2 kernel is able to feet exactly the xor function
            // see http://www.doc.ic.ac.uk/~dfg/ProbabilisticInference/IDAPILecture18.pdf for more infos
            var svm       = new C_SVC(xor_problem, KernelHelper.PolynomialKernel(2, 1, 1), 1);
            var file_name = System.IO.Path.Combine(base_path, "test_export_temp.xml");

            // make sure directory is clean
            if (File.Exists(file_name))
            {
                File.Delete(file_name);
            }

            svm.Export(file_name);

            Assert.IsTrue(File.Exists(file_name));

            var new_svm = new C_SVC(file_name);

            checkXOR(new_svm);

            File.Delete(file_name); // cleanup
        }
Example #3
0
        static void Main(string[] args)
        {
            bool   kernelparam     = false;
            bool   properformat    = false;
            bool   needsFormatting = false;
            bool   done            = false;
            int    vectorlength; // number of features
            int    kernelchoice; // integer representation of selected kernel
            int    numberofArgs = args.Length;
            string inputmatrix, savefilename, labelfile;
            string path = Directory.GetCurrentDirectory();
            string save_model_name;
            string kerneltype;
            string testfile;

            /* SVM specific initializations
             */
            int degree = 3; // default for none specified
            int r      = 1;
            // C and gamma come from using grid.py on the training set resume.mat 982 x 7768
            double C     = 2.0;
            double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF)
            C_SVC  svm;                 // setup the default variable for the SVM

            if (numberofArgs < 1)
            {
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            } // Exit if no params passed on the command line

            /* At least one command line parameter we can continue, but it can't be an int.
             * so check for that next.
             */
            if (numberofArgs == 1 && Int32.TryParse(args[0], out kernelchoice))
            {
                Console.WriteLine(MyStrings.usage); // single paramater can't be int
                System.Environment.Exit(1);
            }
            else // Assume file name and check if it needs formatting, if not we are good to train and save the model
            {
                kernelparam     = false;
                properformat    = HelperFunctions.CheckFormat(args[0]);
                inputmatrix     = args[0];
                savefilename    = inputmatrix.Replace(".mat", ".svm"); // update the suffix
                svm             = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                save_model_name = savefilename.Replace(".svm", ".model");
                svm.Export(save_model_name);
                done = true;
            }

            if (numberofArgs >= 1)
            {
                if (Int32.TryParse(args[0], out kernelchoice))
                {
                    kernelparam = true;

                    switch (numberofArgs)
                    {
                    case 2:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        if (needsFormatting)
                        {
                            Console.WriteLine("Missing label file");
                            System.Environment.Exit(1);
                        }
                        break;

                    case 3:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        labelfile       = args[2];
                        break;

                    case 4:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        labelfile       = args[2];
                        testfile        = args[3];
                        break;

                    default:

                        Console.WriteLine("too many parameters");
                        Console.WriteLine(MyStrings.usage);
                        System.Environment.Exit(1);
                        break;
                    }
                }
            }
            savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix
            if (!done && needsFormatting && args.Length >= 2)
            {
                inputmatrix  = args[1];
                labelfile    = args[2];
                vectorlength = HelperFunctions.VectorLength(inputmatrix);            // Get the number of features
                string[] labels = new string[HelperFunctions.SampleSize(labelfile)]; // Calculate the number of labels and use to create storage

                /* if the input matrix is not already in the correct format Call reformat function
                 * result is that a file is written that is the LIBSVM format, expects the
                 * labels to be in a separate file
                 *
                 * Reformatdata(string[] data, string labels, string fname)
                 *
                 */

                HelperFunctions.Reformatdata(inputmatrix, labels, savefilename, vectorlength);
            }


            // Train the SVM

            /* "." means every 1,000 iterations (or every #data iterations is your #data is less than 1,000).
             *  "*" means that after iterations of using a smaller shrunk problem, we reset to use the whole set. */
            /*  optimization finished, #iter = 219
             *  nu = 0.431030
             *  obj = -100.877286, rho = 0.424632
             *  nSV = 132, nBSV = 107
             *  Total nSV = 132
             *  obj is the optimal objective value of the dual SVM problem. rho is the bias term in the decision
             *  function sgn(w^Tx - rho). nSV and nBSV are number of support vectors and bounded support vectors
             *  (i.e., alpha_i = C). nu-svm is a somewhat equivalent form of C-SVM where C is replaced by nu.
             *  nu simply shows the corresponding parameter.
             */

            /* if a kernel is specified on the command line, then select the corresponding kernel for training the SVM as follows
             * 0 = linear
             * 1 = polynomial
             * 2 = RBF
             * 3 = sigmoind
             * 4 = precomputed
             */

            // 7/23/19 fix up save file name, kernelchoice does not seem to be in the rigth place, also logic flow thru above switch and if statements needs some review

            Int32.TryParse(args[0], out kernelchoice);


            if (kernelparam)
            {
                int caseSwitch = kernelchoice;
                switch (caseSwitch)
                {
                case 0:
                    svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                    kerneltype = "Linear";
                    break;

                case 1:
                    svm        = new C_SVC(savefilename, KernelHelper.PolynomialKernel(degree, gamma, r), C);
                    kerneltype = "Polynomial";
                    break;

                case 2:
                    svm        = new C_SVC(savefilename, KernelHelper.RadialBasisFunctionKernel(gamma), C);
                    kerneltype = "RBF";
                    break;

                default:
                    svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                    kerneltype = "Linear";
                    break;
                }
            }
            else
            {
                svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                kerneltype = "Linear";
            }

            // For RBF kernel, linear kernel would be KernelHelper.LinearKernel
            //
            // var accuracy = svm.GetCrossValidationAccuracy(5);
            save_model_name = savefilename.Replace(".svm", ".model");
            svm.Export(save_model_name);

            /*
             * ********** Stoppted here for checking file input formats
             */


            //double accuracy = svm.Predict(testfile);
            //Console.WriteLine(MyStrings.Accuracy, accuracy * 100);
            Console.WriteLine("SVM kernel type {0}", kerneltype);
        }
Example #4
0
        static void Main(string[] args)
        {
            bool   kernelparam  = false;
            int    numberofArgs = args.Length;
            string inputmatrix;
            string path = Directory.GetCurrentDirectory();
            string save_model_name;
            string kerneltype;
            string testfile;

            /* SVM specific initializations
             */
            int degree = 3; // default for none specified
            int r      = 1;
            // C and gamma come from using grid.py on the training set resume.mat 982 x 7768
            double C     = 2.0;
            double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF)
            C_SVC  svm;                 // setup the default variable for the SVM

            /*
             * Three parameters are required, kernel selection, training file and test file
             */

            if (args.Length != 3)
            {
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }

            if (kernelparam = Int32.TryParse(args[0], out int kernelchoice) && kernelchoice <= 3)
            {
                //Legal value for kernelchoice are 0-3
                //kernelchoice = 1;
            }
            else
            {
                // Not a legal kernel selection
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }
            inputmatrix = args[1];
            testfile    = args[2];
            if (!HelperFunctions.CheckFormat(inputmatrix))
            {
                Console.WriteLine(MyStrings.TrainingFileFormat, inputmatrix);
                System.Environment.Exit(1);
            }
            if (!File.Exists(testfile))
            {
                Console.WriteLine(MyStrings.File_error, inputmatrix);
                System.Environment.Exit(1);
            }

            // Train the SVM

            switch (kernelchoice)
            {
            case 0:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;

            case 1:
                svm        = new C_SVC(inputmatrix, KernelHelper.PolynomialKernel(degree, gamma, r), C);
                kerneltype = MyStrings.Polynomial;
                break;

            case 2:
                svm        = new C_SVC(inputmatrix, KernelHelper.RadialBasisFunctionKernel(gamma), C);
                kerneltype = MyStrings.RBF;
                break;

            case 3:
                svm        = new C_SVC(inputmatrix, KernelHelper.SigmoidKernel(gamma, r), C);
                kerneltype = MyStrings.Sigmoid;
                break;

            default:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;
            }

            // var accuracy = svm.GetCrossValidationAccuracy(5);
            save_model_name = String.Concat(inputmatrix, ".model");
            svm.Export(save_model_name);
            var    predfile = ProblemHelper.ReadProblem(testfile);
            double result   = HelperFunctions.PredictTestSet(testfile, svm);

            Console.WriteLine(MyStrings.Accuracy, Math.Round(result * 100, 2));
            Console.Write("SVM kernel type {0}      ", kerneltype);
            Console.WriteLine(MyStrings.Parameters, C, gamma, degree, r);
        }
Example #5
0
        static void Main(string[] args)
        {
            List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result;
            List <double[]> continuousTestData  = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result;

            // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b)
            foreach (int i in _continuousIndexes)
            {
                using (StreamWriter sw = new StreamWriter($"{i}.txt"))
                {
                    sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i])));
                }
            }

            // Convert continuous to discrete
            Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap);
            List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping);
            List <int[]> discreteTestData  = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping);

            var problem = ProblemHelper.ReadProblem(discreteTrainData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            var test = ProblemHelper.ReadProblem(discreteTestData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            // defaults taken from documentation http://weka.sourceforge.net/doc.stable/weka/classifiers/functions/LibSVM.html
            double c      = 1;               // default C is 1
            double gamma  = 1.0 / problem.l; // default gamma is 1/k
            double r      = 0;               // default coef0 is 0
            int    degree = 3;               // default degree is 3

            Dictionary <string, Kernel> nameKernelMap = new Dictionary <string, Kernel>(StringComparer.OrdinalIgnoreCase)
            {
                { "Linear", KernelHelper.LinearKernel() },
                { "Polynomial", KernelHelper.PolynomialKernel(degree, gamma, r) },
                { "Radial", KernelHelper.RadialBasisFunctionKernel(gamma) },
                { "Sigmoid", KernelHelper.SigmoidKernel(gamma, r) },
            };

            // Get accuracies for base comparison
            // DON'T DO PARALLEL. We don't know if the underlying implementation is MT safe or not.
            //Parallel.ForEach(nameKernelMap.Keys, (kernelName) =>
            foreach (string kernelName in nameKernelMap.Keys)
            {
                Console.WriteLine($"{kernelName}: {GetSVMAccuracy(problem, test, nameKernelMap[kernelName], c)}");
            }
            ;

            // Get accuracy of with Naive Bayes
            double[]             classWeightPrior      = new[] { 1.0, 1.0 };
            double[]             classPriorProbability = new[] { 0.5, 0.5 };
            NaiveBayesClassifier naiveBayes            = NaiveBayesClassifier.Load(discreteTrainData, SVMSupportedClassIndex, classWeightPrior, classPriorProbability);

            Console.WriteLine($"Naive Bayes: {naiveBayes.GetPredictionAccuracy(discreteTestData, SVMSupportedClassIndex)}");

            // Calculate SVMs Bias and Variance
            List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples);

            ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > kernelInstanceClassifierPredictionsMappings = new ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(StringComparer.OrdinalIgnoreCase);

            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                for (int classifierIndex = 0; classifierIndex < BiasVarianceNumOfSamples; classifierIndex++)
                {
                    problem = ProblemHelper.ReadProblem(samples[classifierIndex].Select(arr => arr.Select(i => (double)i).ToList()).ToList());

                    var svm = new C_SVC(problem, nameKernelMap[kernelName], c);

                    for (int instanceIndex = 0; instanceIndex < discreteTestData.Count; instanceIndex++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMappings = instanceClassifierPredictionMappings.GetOrAdd(instanceIndex, new ConcurrentDictionary <int, int>());
                        test = ProblemHelper.ReadProblem(new List <List <double> > {
                            discreteTestData[instanceIndex].Select(i => (double)i).ToList()
                        });

                        for (int i = 0; i < test.l; i++)
                        {
                            var x = test.x[i];
                            var y = test.y[i];
                            classifierPredictionMappings.GetOrAdd(classifierIndex, (int)svm.Predict(x));
                        }
                    }
                }
            }

            Console.WriteLine("Kernel, Bias, Variance, Accuracy");
            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                Tuple <double, double, double> biasVarianceAccuracy = BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, SVMSupportedClassIndex, instanceClassifierPredictionMappings);
                Console.WriteLine($"{kernelName}, {biasVarianceAccuracy.Item1}, {biasVarianceAccuracy.Item2}, {biasVarianceAccuracy.Item3}");
            }

            Console.WriteLine("Press ENTER to continue...");
            Console.ReadLine();
        }