예제 #1
0
        public void Should_be_able_to_read_valid_dataset()
        {
            // Dataset extracted from first five rows of mpg.ds file.
            var dataset = new List <List <double> >
            {
                new List <double> {
                    18, 8, 307.0, 130.0, 3504.0, 12.0, 70, 1
                },
                new List <double> {
                    15, 8, 350.0, 165.0, 3693.0, 11.5, 70, 1
                },
                new List <double> {
                    18, 8, 318.0, 150.0, 3436.0, 11.0, 70, 1
                },
                new List <double> {
                    16, 8, 304.0, 150.0, 3433.0, 12.0, 70, 1
                },
                new List <double> {
                    17, 8, 302.0, 140.0, 3449.0, 10.5, 70, 1
                }
            };

            svm_problem prob = ProblemHelper.ReadAndScaleProblem(dataset);

            // Data count
            int dataCount = dataset.Count();

            Assert.AreEqual(prob.l, dataCount, 0.01);
            Assert.AreEqual(prob.x.Count(), dataCount, 0.01);
            Assert.AreEqual(prob.y.Count(), dataCount, 0.01);

            // Scale
            Assert.AreEqual(prob.x.Max(v => v.Max(n => n.value)), 1.0, 0.1);
            Assert.AreEqual(prob.x.Min(v => v.Max(n => n.value)), 0.0, 0.1);
        }
예제 #2
0
        static public double PredictTestSet(string inputfile, C_SVC svm)
        {
            /* Given a test set "Inputfile" and previoulsy trained SVM calculates the accuracty of the
             * the trained SVM. Fucntion returns the percent correct.
             */

            int    i;
            double total         = 1;
            var    predfile      = ProblemHelper.ReadProblem(inputfile); // Reads in the SVM format file and results in a svm_problem format
            double expectedValue = 0;

            for (i = 0; i < predfile.l; i++)
            {
                var x = predfile.x[i];                  // x is the ith vector sample
                expectedValue = predfile.y[i];
                var predictedValue = svm.Predict(x);    // Make label prediciton
                if (predictedValue == expectedValue)    // Compare the prediction with actual
                {
                    total++;
                }
            }
            double result = ((double)total / (double)i);    // Calculate the accuracy and return

            return(result);
        }
예제 #3
0
        static void Main(string[] args)
        {
            var    path    = Environment.CurrentDirectory;
            string DvCPath = System.IO.Path.Combine(path, DvC_TEST_FILE);
            string DvHPath = System.IO.Path.Combine(path, DvH_TEST_FILE);
            string HvCPath = System.IO.Path.Combine(path, HvC_TEST_FILE);

            DvC_prob = ProblemHelper.ReadAndScaleProblem(DvCPath);
            DvH_prob = ProblemHelper.ReadAndScaleProblem(DvHPath);
            HvC_prob = ProblemHelper.ReadAndScaleProblem(HvCPath);

            var DvCsvm = new C_SVC(DvC_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C);
            var DvHsvm = new C_SVC(DvH_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C);
            var HvCsvm = new C_SVC(HvC_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C);

            var DvCcva = DvCsvm.GetCrossValidationAccuracy(5);
            var DvHcva = DvHsvm.GetCrossValidationAccuracy(2);
            var HvCcva = HvCsvm.GetCrossValidationAccuracy(5);

            DvCsvm.Export(System.IO.Path.Combine(path, DvC_MODEL_FILE));
            DvHsvm.Export(System.IO.Path.Combine(path, DvH_MODEL_FILE));
            HvCsvm.Export(System.IO.Path.Combine(path, HvC_MODEL_FILE));

            Console.WriteLine(String.Format("--------------------------"));
            Console.WriteLine(String.Format("DvC Result: {0}%", (Math.Round(DvCcva * 100, 2)).ToString()));
            Console.WriteLine(String.Format("DvH Result: {0}%", (Math.Round(DvHcva * 100, 2)).ToString()));
            Console.WriteLine(String.Format("HvC Result: {0}%", (Math.Round(HvCcva * 100, 2)).ToString()));
            Console.WriteLine(String.Format("--------------------------"));

            Console.ReadKey();
        }
        /// <summary>
        ///Test pour ReadProblem
        ///</summary>
        //[TestMethod()]
        public void ReadProblemTest()
        {
            string full_path = System.IO.Path.Combine(base_path, TEST_FILE);
            var    prob      = ProblemHelper.ReadProblem(full_path);

            Assert.IsNotNull(prob);
        }
예제 #5
0
        public double buildSVMTestCorpus(string filename)
        {
            double total = 0, tp = 0;
            string trainDataPath = filename + "SimpleTrainSVM.txt";

            if (File.Exists(trainDataPath))
            {
                _test = ProblemHelper.ReadProblem(trainDataPath);
                _test = ProblemHelper.ScaleProblem(_test);
                svm_node[][] sn = _test.x;
                total = sn.Length;
                double[] lbls = _test.y;
                for (int i = 0; i < sn.Length; i++)
                {
                    if (_test.y[i] == svm.Predict(sn[i]))
                    {
                        tp++;
                    }
                }
                fileExistance = true;
                //ProblemHelper.WriteProblem(filename+"TestSVM.txt", _test);
            }
            else
            {
                SVMScale readyData = new SVMScale();
                readyData.buildSVMCorpus(filename);
                readyData.scaleSVMData(filename);
                buildSVMTestCorpus(filename);
            }
            return((tp / total) * 100);
        }
예제 #6
0
        private void buttonSVM_Click(object sender, EventArgs e)
        {
            List <double> values = new List <double>();

            foreach (var column in checkedListBoxVariableRellenar.SelectedItems)
            {
                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    values.Add(double.Parse(dt.Rows[i][column.ToString()].ToString()));
                }

                var dataTraining = ProblemHelper.ReadAndScaleProblem(new List <List <double> >()
                {
                    values
                });
                var    svm = new Epsilon_SVR(DataProblem, KernelHelper.RadialBasisFunctionKernel(Gamma), C, Elipson);
                double mse = svm.GetMeanSquaredError();

                var prediction = svm.Predict(dataTraining.x[0]);
            }


            // 1. primero se debe armar una subtabla con los atributos que se van a utilizar.
            // que serian los que estan en el checkbox.

            // 2. elegir la columna sobre la que se quiere rellenar valores

            // 3. se quitan los registros que contengan datos faltantes de las variables predictoras, para este caso son los que tengan valor de -200

            // 4. Aplicar el algoritmo de VSM

            // 5. Generar Vista con valores resultado

            // 6. Generar resumen de resultados: en tal fila, cambie tal por tal.
        }
예제 #7
0
        public void TestInitialize()
        {
            var path     = Environment.CurrentDirectory;
            var pos      = path.IndexOf("libsvm.net");
            var basePath = path.Substring(0, pos + 10);

            training_prob = ProblemHelper.ReadAndScaleProblem(System.IO.Path.Combine(basePath, TRAINING_FILE));
            test_prob     = ProblemHelper.ReadAndScaleProblem(System.IO.Path.Combine(basePath, TEST_FILE));
        }
        /// <summary>
        ///Test pour ScaleProblem
        ///</summary>
        //[TestMethod()]
        public void ScaleProblemTest()
        {
            string full_path = System.IO.Path.Combine(base_path, TEST_FILE);
            var    prob      = ProblemHelper.ScaleProblem(ProblemHelper.ReadProblem(full_path));

            Assert.IsNotNull(prob);
            Assert.IsTrue(prob.x.Max(v => v.Max(n => n.value)) == 1.0);
            Assert.IsTrue(prob.x.Min(v => v.Min(n => n.value)) == -1.0);
        }
예제 #9
0
        public C_SVC_Tests()
        {
            var current_path = Environment.CurrentDirectory;
            var pos          = current_path.IndexOf("libsvm.net");

            base_path = current_path.Substring(0, pos + 10);
            string full_path = System.IO.Path.Combine(base_path, XOR_DATASET);

            xor_problem = ProblemHelper.ReadProblem(full_path);
        }
예제 #10
0
        public static void LibSVM(List <string> inputData, List <string> testData)
        {
            var inputFilePath = @"D:\新西兰学习生活\大学上课\乐谱数据\input.txt";
            var testFilePath  = @"D:\新西兰学习生活\大学上课\乐谱数据\test.txt";

            PrepareDataLibSvm(inputData, inputFilePath);
            PrepareDataLibSvm(testData, testFilePath);

            var _prob = ProblemHelper.ReadAndScaleProblem(inputFilePath);
            var svm   = new C_SVC(_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C);
        }
예제 #11
0
        public void TestInitialize()
        {
            var    path     = Environment.CurrentDirectory;
            var    pos      = path.IndexOf("libsvm.net");
            var    basePath = path.Substring(0, pos + 10);
            string fullPath = System.IO.Path.Combine(basePath, LEU_TEST_FILE);

            // get data from file
            // Note that you should always scale your data
            _prob = ProblemHelper.ReadAndScaleProblem(fullPath);
        }
예제 #12
0
        public string svmRealTimeTest(double[] testData)
        {
            int len = 0;

            for (int i = 0; i < testData.Length; i++)
            {
                if (testData[i] < lowPass)
                {
                    len++;
                }
                else
                {
                    i += escape;
                }
            }
            svm_problem tempProb = _prob;

            tempProb.x[0] = new svm_node[len];
            //testData=scaleData(testData);

            /*List<List<double>> testD = new List<List<double>>();
             * testD.Add(testData);
             * testData = new List<double>();
             * double[] data1 = new double[] { 4, 13.465019915, 221.931854818, 34.448097045, 51.47996222,41.137614759, 15.230779949, 22.01443672, 32.395593998, 21.310546988, 0.988700891, 6.74993337, 5.037963203, 1.074775069, 0.615915165, 0.920866746, 6.755586104, 5.014666624, 2.568192279, 12.08015653, 03.931508695, 500, 500, 500, 1269.375212, 185.55572135 };
             * for (int i = 0; i < data1.Length; i++)
             *  testData.Add(data1[i]);
             * testD.Add(testData);
             */
            for (int i = 0, j = 0; i < len && j < testData.Length; j++)
            {
                if (testData[j] < lowPass)
                {
                    tempProb.x[0][i]       = new svm_node();
                    tempProb.x[0][i].value = testData[j];
                    tempProb.x[0][i].index = j + 1;
                    i++;
                }
                else
                {
                    j += escape;
                }
            }
            if (len > 0)
            {
                tempProb = ProblemHelper.ScaleProblem(tempProb);
            }
            var predictY = svm.Predict(tempProb.x[0]);

            return(predictionDictionary[(int)predictY]);
        }
예제 #13
0
        public bool buildSVMCorpus(string filename)
        {
            string trainDataPath = filename + "TrainSVM.txt";

            if (File.Exists(trainDataPath))
            {
                _prob = ProblemHelper.ReadProblem(trainDataPath);
                _test = ProblemHelper.ScaleProblem(_prob);
                svm   = new C_SVC(_test, KernelHelper.LinearKernel(), C);
                ProblemHelper.WriteProblem(filename + "output.txt", _test);
                fileExistance = true;
            }
            return(fileExistance);
        }
        /// <summary>
        ///Test pour WriteProblem
        ///</summary>
        //[TestMethod()]
        public void WriteProblemTest()
        {
            string full_test_path = System.IO.Path.Combine(base_path, TEST_FILE);
            var    prob           = ProblemHelper.ReadProblem(full_test_path);

            string full_write_path = System.IO.Path.Combine(base_path, WRITE_FILE);

            if (File.Exists(full_write_path))
            {
                File.Delete(full_write_path);
            }

            ProblemHelper.WriteProblem(full_write_path, prob);
            Assert.IsTrue(File.Exists(full_write_path));
            File.Delete(full_write_path); // cleaunp after test succeeded
        }
예제 #15
0
        public bool buildSVMCorpus(string filename)
        {
            string trainDataPath = filename + "SimpleScaledTrainSVM.txt";

            if (File.Exists(trainDataPath))
            {
                _prob         = ProblemHelper.ReadAndScaleProblem(trainDataPath);
                svm           = new C_SVC(_prob, KernelHelper.LinearKernel(), C);
                fileExistance = true;

                var      reader = new StreamReader(File.OpenRead(filename + "MinMax.txt"));
                string[] minMax = reader.ReadLine().Split(',');
                scale.min = Convert.ToDouble(minMax[0]);
                scale.max = Convert.ToDouble(minMax[1]);
            }

            return(fileExistance);
        }
예제 #16
0
        public void Should_be_able_to_read_files()
        {
            var prob = ProblemHelper.ReadProblem(path_to_xor_dataset);

            Assert.IsNotNull(prob);

            var lineCount = File.ReadLines(path_to_xor_dataset).Count();

            Assert.IsTrue(prob.l == lineCount);
            Assert.IsTrue(prob.x.Count() == lineCount);
            Assert.IsTrue(prob.y.Count() == lineCount);

            Assert.IsTrue(prob.x.Max(v => v.Max(n => n.value)) == 1.0);
            Assert.IsTrue(prob.x.Min(v => v.Min(n => n.value)) == -1.0);

            Assert.IsTrue(prob.y.Max() == 1.0);
            Assert.IsTrue(prob.y.Min() == 0);
        }
예제 #17
0
        public void Should_be_able_to_read_files()
        {
            svm_problem problem = ProblemHelper.ReadProblem(path_to_xor_dataset);

            var expectedProblem = GetExpectedXorProblem();

            Assert.AreEqual(expectedProblem.l, problem.l);
            CollectionAssert.AreEqual(expectedProblem.y, problem.y);

            // We cannot use CollectionAssert so we use this ugly method
            for (var i = 0; i < problem.x.Length; i++)
            {
                for (var j = 0; j < problem.x[i].Length; j++)
                {
                    Assert.AreEqual(expectedProblem.x[i][j].index, problem.x[i][j].index, string.Format("Index value are different on line {0} element {1}", i + 1, j + 1));
                    Assert.AreEqual(expectedProblem.x[i][j].value, problem.x[i][j].value, string.Format("Value are different on line {0} element {1} ", i + 1, j + 1));
                }
            }
        }
예제 #18
0
        public void XORTest()
        {
            var range      = Enumerable.Range(-10, 16);
            var log2gammas = range.Select(i => Math.Pow(2, i));
            var log2Cs     = range.Select(i => Math.Pow(2, i + 1));
            var log2Rs     = range.Select(i => Math.Pow(2, i + 1));
            var prob       = ProblemHelper.ReadProblem(XOR_TRAINING_FILE);
            //Assert.IsTrue(prob.l == 4);
            Tuple <double, double, double, int> best = Tuple.Create(0.0, 0.0, 0.0, prob.l);

            foreach (var g in log2gammas)
            {
                foreach (var c in log2Cs)
                {
                    foreach (var r in log2Rs)
                    {
                        var svm       = new C_SVC(prob, KernelHelper.SigmoidKernel(g, r), c);
                        var errorCout = 0;
                        for (int i = 0; i < prob.l; i++)
                        {
                            //var x = (prob.x[i].FirstOrDefault(xi => xi.index == 1) == null) ? 0.0 : prob.x[i].FirstOrDefault(xi => xi.index == 1).value;
                            //var y = (prob.x[i].FirstOrDefault(xi => xi.index == 2) == null) ? 0.0 : prob.x[i].FirstOrDefault(xi => xi.index == 2).value;
                            var z             = svm.Predict(prob.x[i]);
                            var probabilities = svm.PredictProbabilities(prob.x[i]);
                            if (z != prob.y[i])
                            {
                                errorCout++;
                            }
                            //Debug.WriteLine(String.Format("x={0} & y={1} => z={2} -- {3}", x, y, z, z == prob.y[i]));
                        }
                        if (errorCout < best.Item4)
                        {
                            best = Tuple.Create(g, c, r, errorCout);
                        }
                        //Debug.WriteLine(String.Format("g={0} && C={1} && C={2} => Error rate = {3}%", g, c, r, (double)errorCout / prob.l * 100));
                    }
                }
            }
            Debug.WriteLine(String.Format("BEST :: g={0} && C={1} && R={2} => Error rate = {3}%", best.Item1, best.Item2, best.Item3, (double)best.Item4 / (double)prob.l * 100));
        }
예제 #19
0
파일: Program.cs 프로젝트: mlnethub/ML.Net
        private static (double C, double sigma) Dataset3Params(Matrix <double> x, Vector <double> y, Matrix <double> xval, Vector <double> yval)
        {
            double[] c_val      = new [] { 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 3 }; // possibili valori di C
            double[] sigma_test = new [] { 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 3 }; // possibili valori di sigma

            // Results:
            //  [:,0] - error
            //  [:,1] - C
            //  [:,2] -  sigma
            Matrix <double> results = Matrix <double> .Build.Dense(c_val.Length *sigma_test.Length, 3);

            // convert x, y in libsvm format
            List <List <double> > libSvmData = ConvertToLibSvmFormat(x, y);

            // try all possible pairs of C and sigma
            int i = 0;

            foreach (double c_temp in c_val)
            {
                foreach (double s_temp in sigma_test)
                {
                    double gamma     = 1 / (2 * s_temp * s_temp);
                    var    rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

                    svm_problem prob = ProblemHelper.ReadProblem(libSvmData);
                    C_SVC       svc  = new C_SVC(prob, rbfKernel, c_temp);

                    double error = ComputeValidationError(svc, xval, yval);

                    results[i, 0] = error;
                    results[i, 1] = c_temp;
                    results[i, 2] = s_temp;
                    i++;
                }
            }

            int idx = results.Column(0).MinimumIndex();

            return(results.Row(idx)[1], results.Row(idx)[2]);
        }
예제 #20
0
        public void Should_be_able_to_detect_invalid_feature_count_in_dataset()
        {
            var dataset = new List <List <double> >
            {
                new List <double> {
                    18, 8, 307.0, 130.0, 3504.0, 12.0, 70, 1
                },
                new List <double> {
                    15, 8, 350.0, 165.0, 3693.0, 11.5, 70, 1
                },
                new List <double> {
                    18, 8, 318.0, 150.0, 11.0, 70, 1
                },                                                   // miss the 4th feature column.
                new List <double> {
                    16, 8, 304.0, 150.0, 3433.0, 12.0, 70, 1
                },
                new List <double> {
                    17, 8, 302.0, 140.0, 3449.0, 10.5, 70, 1
                }
            };

            ProblemHelper.ReadAndScaleProblem(dataset);
        }
예제 #21
0
 /// <summary>
 ///     Classification SVM
 ///     Supports multi-class classification
 /// </summary>
 /// <param name="input_file_name">Path to the training data set file. Has respect the libsvm format</param>
 /// <param name="kernel">Selected Kernel</param>
 /// <param name="C">Cost parameter </param>
 /// <param name="cache_size">Indicates the maximum memory that the program can use </param>
 /// <param name="probability">Set this parameter to true if you want to use the PredictProbabilities function</param>
 public C_SVC(string input_file_name, Kernel kernel, double C, double cache_size = 100, bool probability = false)
     : this(ProblemHelper.ReadProblem(input_file_name), kernel, C, cache_size, probability)
 {
 }
예제 #22
0
파일: Program.cs 프로젝트: msbobh/SVM_Train
        static void Main(string[] args)
        {
            bool   kernelparam  = false;
            int    numberofArgs = args.Length;
            string inputmatrix;
            string path = Directory.GetCurrentDirectory();
            string save_model_name;
            string kerneltype;
            string testfile;

            /* SVM specific initializations
             */
            int degree = 3; // default for none specified
            int r      = 1;
            // C and gamma come from using grid.py on the training set resume.mat 982 x 7768
            double C     = 2.0;
            double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF)
            C_SVC  svm;                 // setup the default variable for the SVM

            /*
             * Three parameters are required, kernel selection, training file and test file
             */

            if (args.Length != 3)
            {
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }

            if (kernelparam = Int32.TryParse(args[0], out int kernelchoice) && kernelchoice <= 3)
            {
                //Legal value for kernelchoice are 0-3
                //kernelchoice = 1;
            }
            else
            {
                // Not a legal kernel selection
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }
            inputmatrix = args[1];
            testfile    = args[2];
            if (!HelperFunctions.CheckFormat(inputmatrix))
            {
                Console.WriteLine(MyStrings.TrainingFileFormat, inputmatrix);
                System.Environment.Exit(1);
            }
            if (!File.Exists(testfile))
            {
                Console.WriteLine(MyStrings.File_error, inputmatrix);
                System.Environment.Exit(1);
            }

            // Train the SVM

            switch (kernelchoice)
            {
            case 0:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;

            case 1:
                svm        = new C_SVC(inputmatrix, KernelHelper.PolynomialKernel(degree, gamma, r), C);
                kerneltype = MyStrings.Polynomial;
                break;

            case 2:
                svm        = new C_SVC(inputmatrix, KernelHelper.RadialBasisFunctionKernel(gamma), C);
                kerneltype = MyStrings.RBF;
                break;

            case 3:
                svm        = new C_SVC(inputmatrix, KernelHelper.SigmoidKernel(gamma, r), C);
                kerneltype = MyStrings.Sigmoid;
                break;

            default:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;
            }

            // var accuracy = svm.GetCrossValidationAccuracy(5);
            save_model_name = String.Concat(inputmatrix, ".model");
            svm.Export(save_model_name);
            var    predfile = ProblemHelper.ReadProblem(testfile);
            double result   = HelperFunctions.PredictTestSet(testfile, svm);

            Console.WriteLine(MyStrings.Accuracy, Math.Round(result * 100, 2));
            Console.Write("SVM kernel type {0}      ", kerneltype);
            Console.WriteLine(MyStrings.Parameters, C, gamma, degree, r);
        }
예제 #23
0
 public void Should_be_able_to_detect_void_dataset()
 {
     ProblemHelper.ReadAndScaleProblem(new List <List <double> >());
 }
예제 #24
0
 public void Should_be_able_to_detect_null_dataset()
 {
     ProblemHelper.ReadAndScaleProblem((List <List <double> >)null);
 }
예제 #25
0
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            System.Console.WriteLine("Sentiment Analysis");
            System.Console.WriteLine("======================\n");

            // load data
            System.Console.WriteLine("Loading data....");
            string fileContent = ReadDataFile(".\\data\\wikipedia-detox-250-line-data.tsv");

            // preprocess file
            System.Console.WriteLine("Processing data....");
            string[,] processedComments = ProcessComments(fileContent);
            System.Console.WriteLine($"Data file contains {processedComments.GetLength(0)} comments\n");
            // for(int i = 0; i < 3; i++)
            // {
            //     System.Console.WriteLine($"{processedComments[i, 0]}\t{processedComments[i, 1]}");
            // }
            // System.Console.WriteLine("...\n");

            // generate the vocabulary list
            System.Console.WriteLine("Generating Vocabulary List....");
            string[] vocab = GenerateVocabulary(processedComments);
            System.Console.WriteLine($"Vocabulary generated with {vocab.Length} words\n");

            // get labels from preprocessed comments
            System.Console.WriteLine("Retrieving labels...");
            Vector <double> Y = GetLables(processedComments);

            //System.Console.WriteLine(Y);

            // extract features from processed comments and vocabulary
            System.Console.WriteLine("Extracting features...");
            Matrix <double> X = GetFeatures(processedComments, vocab);

            //System.Console.WriteLine(X);

            // split the data into train and test in ratio 80:20
            System.Console.WriteLine("Splitting data...");
            int m           = X.RowCount;
            int n           = X.ColumnCount;
            int testsetSize = m * 20 / 100;

            Vector <double> testLabel    = Y.SubVector(0, testsetSize);
            Matrix <double> testFeatures = X.SubMatrix(0, testsetSize, 0, n);

            Vector <double> trainingLabel    = Y.SubVector(testsetSize, m - testsetSize);
            Matrix <double> trainingFeatures = X.SubMatrix(testsetSize, m - testsetSize, 0, n);

            System.Console.WriteLine();
            System.Console.WriteLine($"Test set: {testLabel.Count}");
            System.Console.WriteLine($"Training set: {trainingLabel.Count}");

            // trainiong SVM
            System.Console.WriteLine("\nTraining linear SVM ...\n");

            // SVM parameters
            double C            = .4;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(trainingFeatures, trainingLabel);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            System.Console.WriteLine();

            // accuacy on training set
            Vector <double> prediction = SvmPredic(trainingFeatures, svc);
            double          accuracy   = CalculateAccuracy(prediction, trainingLabel);

            System.Console.WriteLine("Training set Accuracy: {0:f2}%\n", accuracy);


            // accuacy on test set
            prediction = SvmPredic(testFeatures, svc);
            accuracy   = CalculateAccuracy(prediction, testLabel);
            System.Console.WriteLine("Test set Accuracy: {0:f2}%\n", accuracy);

            // F1 score
            double f1Score = CalculateF1Score(prediction, testLabel);

            System.Console.WriteLine("F1 Score on test set: {0:f2}%\n", f1Score * 100);

            //Pause();
        }
        void svm()
        {
            var pro = ProblemHelper.ReadProblem("res.txt");

            model = new C_SVC(pro, KernelHelper.RadialBasisFunctionKernel(8), 32.0);
        }
예제 #27
0
파일: Program.cs 프로젝트: mlnethub/ML.Net
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            var M = Matrix <double> .Build;
            var V = Vector <double> .Build;


            //// =============== Part 1: Loading and Visualizing Data ================
            //  We start the exercise by first loading and visualizing the dataset.
            //  The following code will load the dataset into your environment and plot
            //  the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data1:
            // You will have X, y in your environment
            Dictionary <string, Matrix <double> > ms = MatlabReader.ReadAll <double>("data\\ex6data1.mat");

            Matrix <double> X = ms["X"];                 // 51 X 2
            Vector <double> y = ms["y"].Column(0);       // 51 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ==================== Part 2: Training Linear SVM ====================
            //  The following code will train a linear SVM on the dataset and plot the
            //  decision boundary learned.
            //

            System.Console.WriteLine("\nTraining Linear SVM ...\n");

            // You should try to change the C value below and see how the decision
            // boundary varies (e.g., try C = 1000)
            double C            = 1.0;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(X, y);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            System.Console.WriteLine();

            Pause();

            //// =============== Part 3: Implementing Gaussian Kernel ===============
            //  You will now implement the Gaussian kernel to use
            //  with the SVM. You should complete the code in gaussianKernel.m
            //

            System.Console.WriteLine("\nEvaluating the Gaussian Kernel ...\n");

            double sigma = 2.0;
            double sim   = GaussianKernel(
                V.DenseOfArray(new [] { 1.0, 2, 1 }),
                V.DenseOfArray(new [] { 0.0, 4, -1 }),
                sigma
                );

            System.Console.WriteLine("Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0:f6} :\n\t{1:f6}\n(for sigma = 2, this value should be about 0.324652)\n", sigma, sim);

            Pause();

            //// =============== Part 4: Visualizing Dataset 2 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data2.mat");

            X = ms["X"];                 // 863 X 2
            y = ms["y"].Column(0);       // 863 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
            //  After you have implemented the kernel, we can now use it to train the
            //  SVM classifier.
            //

            System.Console.WriteLine("\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n");

            // SVM Parameters
            C     = 1;
            sigma = 0.1;
            double gamma = 1 / (2 * sigma * sigma);

            var rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);


            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            Pause();

            double acc = svc.GetCrossValidationAccuracy(10);

            System.Console.WriteLine("\nCross Validation Accuracy: {0:f6}\n", acc);

            Pause();

            //// =============== Part 6: Visualizing Dataset 3 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data3.mat");

            Matrix <double> Xval;
            Vector <double> yval;

            X    = ms["X"];              // 211 X 2
            y    = ms["y"].Column(0);    // 211 X 1
            Xval = ms["Xval"];           // 200 X 2
            yval = ms["yval"].Column(0); // 200 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            //// ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========

            //  This is a different dataset that you can use to experiment with. Try
            //  different values of C and sigma here.
            //


            (C, sigma) = Dataset3Params(X, y, Xval, yval);

            gamma     = 1 / (2 * sigma * sigma);
            rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);

            PlotBoundary(X, svc);

            GnuPlot.HoldOff();
            Pause();
        }
예제 #28
0
        static void Main(string[] args)
        {
            List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result;
            List <double[]> continuousTestData  = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result;

            // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b)
            foreach (int i in _continuousIndexes)
            {
                using (StreamWriter sw = new StreamWriter($"{i}.txt"))
                {
                    sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i])));
                }
            }

            // Convert continuous to discrete
            Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap);
            List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping);
            List <int[]> discreteTestData  = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping);

            var problem = ProblemHelper.ReadProblem(discreteTrainData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            var test = ProblemHelper.ReadProblem(discreteTestData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            // defaults taken from documentation http://weka.sourceforge.net/doc.stable/weka/classifiers/functions/LibSVM.html
            double c      = 1;               // default C is 1
            double gamma  = 1.0 / problem.l; // default gamma is 1/k
            double r      = 0;               // default coef0 is 0
            int    degree = 3;               // default degree is 3

            Dictionary <string, Kernel> nameKernelMap = new Dictionary <string, Kernel>(StringComparer.OrdinalIgnoreCase)
            {
                { "Linear", KernelHelper.LinearKernel() },
                { "Polynomial", KernelHelper.PolynomialKernel(degree, gamma, r) },
                { "Radial", KernelHelper.RadialBasisFunctionKernel(gamma) },
                { "Sigmoid", KernelHelper.SigmoidKernel(gamma, r) },
            };

            // Get accuracies for base comparison
            // DON'T DO PARALLEL. We don't know if the underlying implementation is MT safe or not.
            //Parallel.ForEach(nameKernelMap.Keys, (kernelName) =>
            foreach (string kernelName in nameKernelMap.Keys)
            {
                Console.WriteLine($"{kernelName}: {GetSVMAccuracy(problem, test, nameKernelMap[kernelName], c)}");
            }
            ;

            // Get accuracy of with Naive Bayes
            double[]             classWeightPrior      = new[] { 1.0, 1.0 };
            double[]             classPriorProbability = new[] { 0.5, 0.5 };
            NaiveBayesClassifier naiveBayes            = NaiveBayesClassifier.Load(discreteTrainData, SVMSupportedClassIndex, classWeightPrior, classPriorProbability);

            Console.WriteLine($"Naive Bayes: {naiveBayes.GetPredictionAccuracy(discreteTestData, SVMSupportedClassIndex)}");

            // Calculate SVMs Bias and Variance
            List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples);

            ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > kernelInstanceClassifierPredictionsMappings = new ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(StringComparer.OrdinalIgnoreCase);

            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                for (int classifierIndex = 0; classifierIndex < BiasVarianceNumOfSamples; classifierIndex++)
                {
                    problem = ProblemHelper.ReadProblem(samples[classifierIndex].Select(arr => arr.Select(i => (double)i).ToList()).ToList());

                    var svm = new C_SVC(problem, nameKernelMap[kernelName], c);

                    for (int instanceIndex = 0; instanceIndex < discreteTestData.Count; instanceIndex++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMappings = instanceClassifierPredictionMappings.GetOrAdd(instanceIndex, new ConcurrentDictionary <int, int>());
                        test = ProblemHelper.ReadProblem(new List <List <double> > {
                            discreteTestData[instanceIndex].Select(i => (double)i).ToList()
                        });

                        for (int i = 0; i < test.l; i++)
                        {
                            var x = test.x[i];
                            var y = test.y[i];
                            classifierPredictionMappings.GetOrAdd(classifierIndex, (int)svm.Predict(x));
                        }
                    }
                }
            }

            Console.WriteLine("Kernel, Bias, Variance, Accuracy");
            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                Tuple <double, double, double> biasVarianceAccuracy = BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, SVMSupportedClassIndex, instanceClassifierPredictionMappings);
                Console.WriteLine($"{kernelName}, {biasVarianceAccuracy.Item1}, {biasVarianceAccuracy.Item2}, {biasVarianceAccuracy.Item3}");
            }

            Console.WriteLine("Press ENTER to continue...");
            Console.ReadLine();
        }