Beispiel #1
0
 /// <summary>
 /// Scales a problem using the provided range.  This will not affect the parameter.
 /// </summary>
 /// <param name="prob">The problem to scale</param>
 /// <param name="range">The Range transform to use in scaling</param>
 /// <returns>The Scaled problem</returns>
 public static svm_problem Scale(this RangeTransform range, svm_problem prob) {
   svm_problem scaledProblem = new svm_problem() { l = prob.l, y = new double[prob.l], x = new svm_node[prob.l][] };
   for (int i = 0; i < scaledProblem.l; i++) {
     scaledProblem.x[i] = new svm_node[prob.x[i].Length];
     for (int j = 0; j < scaledProblem.x[i].Length; j++)
       scaledProblem.x[i][j] = new svm_node() { index = prob.x[i][j].index, value = range.Transform(prob.x[i][j].value, prob.x[i][j].index) };
     scaledProblem.y[i] = prob.y[i];
   }
   return scaledProblem;
 }
Beispiel #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: protected void trainInternal(java.util.LinkedHashMap<String, String> libOptions) throws org.maltparser.core.exception.MaltChainedException
        protected internal override void trainInternal(LinkedHashMap <string, string> libOptions)
        {
            try
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final libsvm.svm_problem prob = readProblem(getInstanceInputStreamReader(".ins"), libOptions);
                svm_problem prob = readProblem(getInstanceInputStreamReader(".ins"), libOptions);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final libsvm.svm_parameter param = getLibSvmParameters(libOptions);
                svm_parameter param = getLibSvmParameters(libOptions);
                if (svm.svm_check_parameter(prob, param) != null)
                {
                    throw new LibException(svm.svm_check_parameter(prob, param));
                }
                Configuration config = Configuration;

                if (config.LoggerInfoEnabled)
                {
                    config.logInfoMessage("Creating LIBSVM model " + getFile(".moo").Name + "\n");
                }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.io.PrintStream out = System.out;
                PrintStream @out = System.out;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.io.PrintStream err = System.err;
                PrintStream err = System.err;
                System.Out = NoPrintStream.NO_PRINTSTREAM;
                System.Err = NoPrintStream.NO_PRINTSTREAM;
                svm_model model = svm.svm_train(prob, param);
                System.Out = err;
                System.Out = @out;
                ObjectOutputStream output = new ObjectOutputStream(new BufferedOutputStream(new FileStream(getFile(".moo").AbsolutePath, FileMode.Create, FileAccess.Write)));
                try
                {
                    output.writeObject(new MaltLibsvmModel(model, prob));
                }
                finally
                {
                    output.close();
                }
                bool saveInstanceFiles = ((bool?)Configuration.getOptionValue("lib", "save_instance_files")).Value;
                if (!saveInstanceFiles)
                {
                    getFile(".ins").delete();
                }
            }
            catch (OutOfMemoryException e)
            {
                throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
            }
            catch (ArgumentException e)
            {
                throw new LibException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
            }
            catch (SecurityException e)
            {
                throw new LibException("The LIBSVM learner cannot remove the instance file. ", e);
            }
            catch (IOException e)
            {
                throw new LibException("The LIBSVM learner cannot save the model file '" + getFile(".mod").AbsolutePath + "'. ", e);
            }
        }
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            System.Console.WriteLine("Sentiment Analysis");
            System.Console.WriteLine("======================\n");

            // load data
            System.Console.WriteLine("Loading data....");
            string fileContent = ReadDataFile(".\\data\\wikipedia-detox-250-line-data.tsv");

            // preprocess file
            System.Console.WriteLine("Processing data....");
            string[,] processedComments = ProcessComments(fileContent);
            System.Console.WriteLine($"Data file contains {processedComments.GetLength(0)} comments\n");
            // for(int i = 0; i < 3; i++)
            // {
            //     System.Console.WriteLine($"{processedComments[i, 0]}\t{processedComments[i, 1]}");
            // }
            // System.Console.WriteLine("...\n");

            // generate the vocabulary list
            System.Console.WriteLine("Generating Vocabulary List....");
            string[] vocab = GenerateVocabulary(processedComments);
            System.Console.WriteLine($"Vocabulary generated with {vocab.Length} words\n");

            // get labels from preprocessed comments
            System.Console.WriteLine("Retrieving labels...");
            Vector <double> Y = GetLables(processedComments);

            //System.Console.WriteLine(Y);

            // extract features from processed comments and vocabulary
            System.Console.WriteLine("Extracting features...");
            Matrix <double> X = GetFeatures(processedComments, vocab);

            //System.Console.WriteLine(X);

            // split the data into train and test in ratio 80:20
            System.Console.WriteLine("Splitting data...");
            int m           = X.RowCount;
            int n           = X.ColumnCount;
            int testsetSize = m * 20 / 100;

            Vector <double> testLabel    = Y.SubVector(0, testsetSize);
            Matrix <double> testFeatures = X.SubMatrix(0, testsetSize, 0, n);

            Vector <double> trainingLabel    = Y.SubVector(testsetSize, m - testsetSize);
            Matrix <double> trainingFeatures = X.SubMatrix(testsetSize, m - testsetSize, 0, n);

            System.Console.WriteLine();
            System.Console.WriteLine($"Test set: {testLabel.Count}");
            System.Console.WriteLine($"Training set: {trainingLabel.Count}");

            // trainiong SVM
            System.Console.WriteLine("\nTraining linear SVM ...\n");

            // SVM parameters
            double C            = .4;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(trainingFeatures, trainingLabel);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            System.Console.WriteLine();

            // accuacy on training set
            Vector <double> prediction = SvmPredic(trainingFeatures, svc);
            double          accuracy   = CalculateAccuracy(prediction, trainingLabel);

            System.Console.WriteLine("Training set Accuracy: {0:f2}%\n", accuracy);


            // accuacy on test set
            prediction = SvmPredic(testFeatures, svc);
            accuracy   = CalculateAccuracy(prediction, testLabel);
            System.Console.WriteLine("Test set Accuracy: {0:f2}%\n", accuracy);

            // F1 score
            double f1Score = CalculateF1Score(prediction, testLabel);

            System.Console.WriteLine("F1 Score on test set: {0:f2}%\n", f1Score * 100);

            //Pause();
        }
Beispiel #4
0
 /// <summary>
 ///     Classification SVM
 ///     Supports multi-class classification
 /// </summary>
 /// <param name="prob">Training Data Set</param>
 /// <param name="kernel">Selected Kernel</param>
 /// <param name="C">Cost parameter </param>
 /// <param name="cache_size">Indicates the maximum memory that the program can use </param>
 /// <param name="probability">Set this parameter to true if you want to use the PredictProbabilities function</param>
 public C_SVC(svm_problem prob, Kernel kernel, double C, double cache_size = 100, bool probability = false)
     : base(SvmType.C_SVC, prob, kernel, C, cache_size, probability ? 1 : 0)
 {
 }
Beispiel #5
0
        static Tuple <double, double> RunPLAvsSVM(int experiments, int points)
        {
            const int TEST_POINTS = 10000;
            Random    rnd         = new Random();

            long svmWins = 0, svCount = 0;

            for (int i = 1; i <= experiments; i++)
            {
                //pick a random line y = a * x + b
                double x1 = rnd.NextDouble(), y1 = rnd.NextDouble(), x2 = rnd.NextDouble(), y2 = rnd.NextDouble();
                var    Wf = new DenseVector(3);
                Wf[0] = 1;
                Wf[1] = (y1 - y2) / (x1 * y2 - y1 * x2);
                Wf[2] = (x2 - x1) / (x1 * y2 - y1 * x2);
                Func <MathNet.Numerics.LinearAlgebra.Generic.Vector <double>, int> f = x => Wf.DotProduct(x) >= 0 ? 1 : -1;

                //generate training set of N random points
                var X = new DenseMatrix(points, 3);
                do
                {
                    for (int j = 0; j < points; j++)
                    {
                        X[j, 0] = 1;
                        X[j, 1] = rnd.NextDouble() * 2 - 1;
                        X[j, 2] = rnd.NextDouble() * 2 - 1;
                    }
                }while (Enumerable.Range(0, X.RowCount).All(j => f(X.Row(0)) == f(X.Row(j))));

                var W = new DenseVector(3);
                Func <MathNet.Numerics.LinearAlgebra.Generic.Vector <double>, int> h = x => W.DotProduct(x) >= 0 ? 1 : -1;

                //run Perceptron
                int k = 1;
                while (Enumerable.Range(0, points).Any(j => h(X.Row(j)) != f(X.Row(j))))
                {
                    //find all misclasified points
                    int[] M = Enumerable.Range(0, points).Where(j => h(X.Row(j)) != f(X.Row(j))).ToArray();
                    int   m = M[rnd.Next(0, M.Length)];

                    int sign = f(X.Row(m));
                    W[0] += sign;
                    W[1] += sign * X[m, 1];
                    W[2] += sign * X[m, 2];
                    k++;
                }

                //calculate P[f(Xtest) != h(Xtest)]
                DenseVector Xtest = new DenseVector(3);
                Xtest[0] = 1;
                int matches = 0;
                for (int j = 0; j < TEST_POINTS; j++)
                {
                    Xtest[1] = rnd.NextDouble() * 2 - 1;
                    Xtest[2] = rnd.NextDouble() * 2 - 1;
                    if (f(Xtest) == h(Xtest))
                    {
                        matches++;
                    }
                }
                double Ppla = (matches + 0.0) / TEST_POINTS;

                //Run SVM
                var prob = new svm_problem()
                {
                    x = Enumerable.Range(0, points).Select(j =>
                                                           new svm_node[] {
                        new svm_node()
                        {
                            index = 0, value = X[j, 1]
                        },
                        new svm_node()
                        {
                            index = 1, value = X[j, 2]
                        }
                    }).ToArray(),
                    y = Enumerable.Range(0, points).Select(j => (double)f(X.Row(j))).ToArray(),
                    l = points
                };

                var model = svm.svm_train(prob, new svm_parameter()
                {
                    svm_type    = (int)SvmType.C_SVC,
                    kernel_type = (int)KernelType.LINEAR,
                    C           = 1000000,
                    eps         = 0.001,
                    shrinking   = 0
                });

                //calculate P[f(Xtest) != h_svm(Xtest)]
                svm_node[] Xsvm = new svm_node[] {
                    new svm_node()
                    {
                        index = 0, value = 1.0
                    },
                    new svm_node()
                    {
                        index = 1, value = 1.0
                    }
                };
                matches = 0;

                for (int j = 0; j < TEST_POINTS; j++)
                {
                    Xtest[1]      = rnd.NextDouble() * 2 - 1;
                    Xsvm[0].value = Xtest[1];
                    Xtest[2]      = rnd.NextDouble() * 2 - 1;
                    Xsvm[1].value = Xtest[2];
                    if (f(Xtest) == (svm.svm_predict(model, Xsvm) > 0 ? 1 : -1))
                    {
                        matches++;
                    }
                }
                double Psvm = (matches + 0.0) / TEST_POINTS;

                svCount += model.l;
                if (Psvm >= Ppla)
                {
                    svmWins++;
                }
            }

            return(Tuple.Create((svmWins + 0.0) / experiments, (svCount + 0.0) / experiments));
        }
Beispiel #6
0
 /// <summary>
 /// Determines the Range transform for the provided problem.  Uses the default lower and upper bounds.
 /// </summary>
 /// <param name="prob">The Problem to analyze</param>
 /// <returns>The Range transform for the problem</returns>
 public static RangeTransform Compute(svm_problem prob)
 {
     return(Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND));
 }
Beispiel #7
0
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            var M = Matrix <double> .Build;
            var V = Vector <double> .Build;


            //// =============== Part 1: Loading and Visualizing Data ================
            //  We start the exercise by first loading and visualizing the dataset.
            //  The following code will load the dataset into your environment and plot
            //  the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data1:
            // You will have X, y in your environment
            Dictionary <string, Matrix <double> > ms = MatlabReader.ReadAll <double>("data\\ex6data1.mat");

            Matrix <double> X = ms["X"];                 // 51 X 2
            Vector <double> y = ms["y"].Column(0);       // 51 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ==================== Part 2: Training Linear SVM ====================
            //  The following code will train a linear SVM on the dataset and plot the
            //  decision boundary learned.
            //

            System.Console.WriteLine("\nTraining Linear SVM ...\n");

            // You should try to change the C value below and see how the decision
            // boundary varies (e.g., try C = 1000)
            double C            = 1.0;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(X, y);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            System.Console.WriteLine();

            Pause();

            //// =============== Part 3: Implementing Gaussian Kernel ===============
            //  You will now implement the Gaussian kernel to use
            //  with the SVM. You should complete the code in gaussianKernel.m
            //

            System.Console.WriteLine("\nEvaluating the Gaussian Kernel ...\n");

            double sigma = 2.0;
            double sim   = GaussianKernel(
                V.DenseOfArray(new [] { 1.0, 2, 1 }),
                V.DenseOfArray(new [] { 0.0, 4, -1 }),
                sigma
                );

            System.Console.WriteLine("Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0:f6} :\n\t{1:f6}\n(for sigma = 2, this value should be about 0.324652)\n", sigma, sim);

            Pause();

            //// =============== Part 4: Visualizing Dataset 2 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data2.mat");

            X = ms["X"];                 // 863 X 2
            y = ms["y"].Column(0);       // 863 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
            //  After you have implemented the kernel, we can now use it to train the
            //  SVM classifier.
            //

            System.Console.WriteLine("\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n");

            // SVM Parameters
            C     = 1;
            sigma = 0.1;
            double gamma = 1 / (2 * sigma * sigma);

            var rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);


            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            Pause();

            double acc = svc.GetCrossValidationAccuracy(10);

            System.Console.WriteLine("\nCross Validation Accuracy: {0:f6}\n", acc);

            Pause();

            //// =============== Part 6: Visualizing Dataset 3 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data3.mat");

            Matrix <double> Xval;
            Vector <double> yval;

            X    = ms["X"];              // 211 X 2
            y    = ms["y"].Column(0);    // 211 X 1
            Xval = ms["Xval"];           // 200 X 2
            yval = ms["yval"].Column(0); // 200 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            //// ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========

            //  This is a different dataset that you can use to experiment with. Try
            //  different values of C and sigma here.
            //


            (C, sigma) = Dataset3Params(X, y, Xval, yval);

            gamma     = 1 / (2 * sigma * sigma);
            rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);

            PlotBoundary(X, svc);

            GnuPlot.HoldOff();
            Pause();
        }
        private svm_problem gen_svm_training_data(List <Dictionary <string, double> > docWordDicList, Dictionary <string, int> dictionary, List <int> trainingAnswer)
        {
            var prob = new svm_problem();
            var vy   = new List <double>();     // label list
            var vx   = new List <svm_node[]>(); // node list

            StreamWriter file = new StreamWriter(SVM_TRAIN_FILE_NAME);


            for (int i = 0; i < docWordDicList.Count; i++)
            {
                String trainStr = trainingAnswer[i] + "";
                List <KeyValuePair <int, double> > nodeList = new List <KeyValuePair <int, double> >();
                List <string> wordList = docWordDicList[i].Keys.ToList();

                foreach (string word in wordList)
                {
                    if (dictionary.ContainsKey(word))
                    {
                        int    theIndex = dictionary[word];
                        double theValue = (double)docWordDicList[i][word];

                        nodeList.Add(new KeyValuePair <int, double>(theIndex, theValue));
                    }
                }

                if (nodeList.Count > 0)
                {
                    List <svm_node> x = new List <svm_node>();


                    nodeList.Sort(
                        delegate(KeyValuePair <int, double> firstPair,
                                 KeyValuePair <int, double> nextPair)
                    {
                        int a = firstPair.Key;
                        int b = nextPair.Key;

                        return(a.CompareTo(b));
                    }
                        );

                    double labelValue = (double)trainingAnswer[i];

                    for (int k = 0; k < nodeList.Count; k++)
                    {
                        KeyValuePair <int, double> node = nodeList[k];

                        x.Add(new svm_node() // svm node
                        {
                            index = node.Key,
                            value = node.Value,
                        });

                        Console.WriteLine(@"## train data - label:{2}, index:{0}, value:{1}", node.Key, node.Value, labelValue);

                        // Sango : just for TEST to  output the node to file
                        String theIndex = System.Convert.ToString(node.Key);
                        String theValue = System.Convert.ToString(node.Value);
                        trainStr = trainStr + " " + theIndex + ":" + theValue;
                    }
                    file.WriteLine(trainStr);


                    vy.Add((double)trainingAnswer[i]); // label
                    vx.Add(x.ToArray());
                }

                //Console.WriteLine("## get new data:" + trainStr);
            }

            file.Close();

            prob.l = vy.Count;
            prob.x = vx.ToArray();
            prob.y = vy.ToArray();

            return(prob);
        }
Beispiel #9
0
        private void FitInternal(Matrix <double> x, Vector <double> y)
        {
            if (this.Kernel.KernelFunction != null)
            {
                // you must store a reference to X to compute the kernel in predict
                // TODO: add keyword copy to copy on demand
                this.xFit = x;
                x         = this.ComputeKernel(x);

                if (x.RowCount != x.ColumnCount)
                {
                    throw new ArgumentException("X.RowCount should be equal to X.ColumnCount");
                }
            }

            var problem = new svm_problem();

            problem.l = x.RowCount;
            problem.x = new svm_node[x.RowCount][];
            foreach (var row in x.RowEnumerator())
            {
                if (Kernel.LibSvmKernel == LibSvmKernel.Precomputed)
                {
                    var svmNodes =
                        row.Item2.GetIndexedEnumerator().Select(i =>
                                                                new svm_node
                    {
                        index = i.Item1 + 1,
                        value = i.Item2
                    });

                    problem.x[row.Item1] =
                        new[]
                    {
                        new svm_node
                        {
                            index = 0,
                            value = row.Item1 + 1
                        }
                    }.Concat(svmNodes).ToArray();
                }
                else
                {
                    var svmNodes =
                        row.Item2.GetIndexedEnumerator().Select(
                            i => new svm_node {
                        index = i.Item1, value = i.Item2
                    });

                    problem.x[row.Item1] = svmNodes.ToArray();
                }
            }

            problem.y = y.ToArray();

            this.Param.kernel_type = (int)this.Kernel.LibSvmKernel;
            if (new[] { LibSvmKernel.Poly, LibSvmKernel.Rbf }.Contains(this.Kernel.LibSvmKernel) &&
                this.Gamma == 0)
            {
                // if custom gamma is not provided ...
                this.Param.gamma = 1.0 / x.ColumnCount;
            }
            else
            {
                this.Param.gamma = this.Gamma;
            }

            this.Model = svm.svm_train(problem, this.Param);
        }