/// <summary> /// Scales a problem using the provided range. This will not affect the parameter. /// </summary> /// <param name="prob">The problem to scale</param> /// <param name="range">The Range transform to use in scaling</param> /// <returns>The Scaled problem</returns> public static svm_problem Scale(this RangeTransform range, svm_problem prob) { svm_problem scaledProblem = new svm_problem() { l = prob.l, y = new double[prob.l], x = new svm_node[prob.l][] }; for (int i = 0; i < scaledProblem.l; i++) { scaledProblem.x[i] = new svm_node[prob.x[i].Length]; for (int j = 0; j < scaledProblem.x[i].Length; j++) scaledProblem.x[i][j] = new svm_node() { index = prob.x[i][j].index, value = range.Transform(prob.x[i][j].value, prob.x[i][j].index) }; scaledProblem.y[i] = prob.y[i]; } return scaledProblem; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: protected void trainInternal(java.util.LinkedHashMap<String, String> libOptions) throws org.maltparser.core.exception.MaltChainedException protected internal override void trainInternal(LinkedHashMap <string, string> libOptions) { try { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final libsvm.svm_problem prob = readProblem(getInstanceInputStreamReader(".ins"), libOptions); svm_problem prob = readProblem(getInstanceInputStreamReader(".ins"), libOptions); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final libsvm.svm_parameter param = getLibSvmParameters(libOptions); svm_parameter param = getLibSvmParameters(libOptions); if (svm.svm_check_parameter(prob, param) != null) { throw new LibException(svm.svm_check_parameter(prob, param)); } Configuration config = Configuration; if (config.LoggerInfoEnabled) { config.logInfoMessage("Creating LIBSVM model " + getFile(".moo").Name + "\n"); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.io.PrintStream out = System.out; PrintStream @out = System.out; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.io.PrintStream err = System.err; PrintStream err = System.err; System.Out = NoPrintStream.NO_PRINTSTREAM; System.Err = NoPrintStream.NO_PRINTSTREAM; svm_model model = svm.svm_train(prob, param); System.Out = err; System.Out = @out; ObjectOutputStream output = new ObjectOutputStream(new BufferedOutputStream(new FileStream(getFile(".moo").AbsolutePath, FileMode.Create, FileAccess.Write))); try { output.writeObject(new MaltLibsvmModel(model, prob)); } finally { output.close(); } bool saveInstanceFiles = ((bool?)Configuration.getOptionValue("lib", "save_instance_files")).Value; if (!saveInstanceFiles) { getFile(".ins").delete(); } } catch (OutOfMemoryException e) { throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); } catch (ArgumentException e) { throw new LibException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); } catch (SecurityException e) { throw new LibException("The LIBSVM learner cannot remove the instance file. ", e); } catch (IOException e) { throw new LibException("The LIBSVM learner cannot save the model file '" + getFile(".mod").AbsolutePath + "'. ", e); } }
static void Main(string[] args) { if (!System.Console.IsOutputRedirected) { System.Console.Clear(); } CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US"); System.Console.WriteLine("Sentiment Analysis"); System.Console.WriteLine("======================\n"); // load data System.Console.WriteLine("Loading data...."); string fileContent = ReadDataFile(".\\data\\wikipedia-detox-250-line-data.tsv"); // preprocess file System.Console.WriteLine("Processing data...."); string[,] processedComments = ProcessComments(fileContent); System.Console.WriteLine($"Data file contains {processedComments.GetLength(0)} comments\n"); // for(int i = 0; i < 3; i++) // { // System.Console.WriteLine($"{processedComments[i, 0]}\t{processedComments[i, 1]}"); // } // System.Console.WriteLine("...\n"); // generate the vocabulary list System.Console.WriteLine("Generating Vocabulary List...."); string[] vocab = GenerateVocabulary(processedComments); System.Console.WriteLine($"Vocabulary generated with {vocab.Length} words\n"); // get labels from preprocessed comments System.Console.WriteLine("Retrieving labels..."); Vector <double> Y = GetLables(processedComments); //System.Console.WriteLine(Y); // extract features from processed comments and vocabulary System.Console.WriteLine("Extracting features..."); Matrix <double> X = GetFeatures(processedComments, vocab); //System.Console.WriteLine(X); // split the data into train and test in ratio 80:20 System.Console.WriteLine("Splitting data..."); int m = X.RowCount; int n = X.ColumnCount; int testsetSize = m * 20 / 100; Vector <double> testLabel = Y.SubVector(0, testsetSize); Matrix <double> testFeatures = X.SubMatrix(0, testsetSize, 0, n); Vector <double> trainingLabel = Y.SubVector(testsetSize, m - testsetSize); Matrix <double> trainingFeatures = X.SubMatrix(testsetSize, m - testsetSize, 0, n); System.Console.WriteLine(); System.Console.WriteLine($"Test set: {testLabel.Count}"); System.Console.WriteLine($"Training set: {trainingLabel.Count}"); // trainiong SVM System.Console.WriteLine("\nTraining linear SVM ...\n"); // SVM parameters double C = .4; var linearKernel = KernelHelper.LinearKernel(); List <List <double> > libSvmData = ConvertToLibSvmFormat(trainingFeatures, trainingLabel); svm_problem prob = ProblemHelper.ReadProblem(libSvmData); var svc = new C_SVC(prob, linearKernel, C); System.Console.WriteLine(); // accuacy on training set Vector <double> prediction = SvmPredic(trainingFeatures, svc); double accuracy = CalculateAccuracy(prediction, trainingLabel); System.Console.WriteLine("Training set Accuracy: {0:f2}%\n", accuracy); // accuacy on test set prediction = SvmPredic(testFeatures, svc); accuracy = CalculateAccuracy(prediction, testLabel); System.Console.WriteLine("Test set Accuracy: {0:f2}%\n", accuracy); // F1 score double f1Score = CalculateF1Score(prediction, testLabel); System.Console.WriteLine("F1 Score on test set: {0:f2}%\n", f1Score * 100); //Pause(); }
/// <summary> /// Classification SVM /// Supports multi-class classification /// </summary> /// <param name="prob">Training Data Set</param> /// <param name="kernel">Selected Kernel</param> /// <param name="C">Cost parameter </param> /// <param name="cache_size">Indicates the maximum memory that the program can use </param> /// <param name="probability">Set this parameter to true if you want to use the PredictProbabilities function</param> public C_SVC(svm_problem prob, Kernel kernel, double C, double cache_size = 100, bool probability = false) : base(SvmType.C_SVC, prob, kernel, C, cache_size, probability ? 1 : 0) { }
static Tuple <double, double> RunPLAvsSVM(int experiments, int points) { const int TEST_POINTS = 10000; Random rnd = new Random(); long svmWins = 0, svCount = 0; for (int i = 1; i <= experiments; i++) { //pick a random line y = a * x + b double x1 = rnd.NextDouble(), y1 = rnd.NextDouble(), x2 = rnd.NextDouble(), y2 = rnd.NextDouble(); var Wf = new DenseVector(3); Wf[0] = 1; Wf[1] = (y1 - y2) / (x1 * y2 - y1 * x2); Wf[2] = (x2 - x1) / (x1 * y2 - y1 * x2); Func <MathNet.Numerics.LinearAlgebra.Generic.Vector <double>, int> f = x => Wf.DotProduct(x) >= 0 ? 1 : -1; //generate training set of N random points var X = new DenseMatrix(points, 3); do { for (int j = 0; j < points; j++) { X[j, 0] = 1; X[j, 1] = rnd.NextDouble() * 2 - 1; X[j, 2] = rnd.NextDouble() * 2 - 1; } }while (Enumerable.Range(0, X.RowCount).All(j => f(X.Row(0)) == f(X.Row(j)))); var W = new DenseVector(3); Func <MathNet.Numerics.LinearAlgebra.Generic.Vector <double>, int> h = x => W.DotProduct(x) >= 0 ? 1 : -1; //run Perceptron int k = 1; while (Enumerable.Range(0, points).Any(j => h(X.Row(j)) != f(X.Row(j)))) { //find all misclasified points int[] M = Enumerable.Range(0, points).Where(j => h(X.Row(j)) != f(X.Row(j))).ToArray(); int m = M[rnd.Next(0, M.Length)]; int sign = f(X.Row(m)); W[0] += sign; W[1] += sign * X[m, 1]; W[2] += sign * X[m, 2]; k++; } //calculate P[f(Xtest) != h(Xtest)] DenseVector Xtest = new DenseVector(3); Xtest[0] = 1; int matches = 0; for (int j = 0; j < TEST_POINTS; j++) { Xtest[1] = rnd.NextDouble() * 2 - 1; Xtest[2] = rnd.NextDouble() * 2 - 1; if (f(Xtest) == h(Xtest)) { matches++; } } double Ppla = (matches + 0.0) / TEST_POINTS; //Run SVM var prob = new svm_problem() { x = Enumerable.Range(0, points).Select(j => new svm_node[] { new svm_node() { index = 0, value = X[j, 1] }, new svm_node() { index = 1, value = X[j, 2] } }).ToArray(), y = Enumerable.Range(0, points).Select(j => (double)f(X.Row(j))).ToArray(), l = points }; var model = svm.svm_train(prob, new svm_parameter() { svm_type = (int)SvmType.C_SVC, kernel_type = (int)KernelType.LINEAR, C = 1000000, eps = 0.001, shrinking = 0 }); //calculate P[f(Xtest) != h_svm(Xtest)] svm_node[] Xsvm = new svm_node[] { new svm_node() { index = 0, value = 1.0 }, new svm_node() { index = 1, value = 1.0 } }; matches = 0; for (int j = 0; j < TEST_POINTS; j++) { Xtest[1] = rnd.NextDouble() * 2 - 1; Xsvm[0].value = Xtest[1]; Xtest[2] = rnd.NextDouble() * 2 - 1; Xsvm[1].value = Xtest[2]; if (f(Xtest) == (svm.svm_predict(model, Xsvm) > 0 ? 1 : -1)) { matches++; } } double Psvm = (matches + 0.0) / TEST_POINTS; svCount += model.l; if (Psvm >= Ppla) { svmWins++; } } return(Tuple.Create((svmWins + 0.0) / experiments, (svCount + 0.0) / experiments)); }
/// <summary> /// Determines the Range transform for the provided problem. Uses the default lower and upper bounds. /// </summary> /// <param name="prob">The Problem to analyze</param> /// <returns>The Range transform for the problem</returns> public static RangeTransform Compute(svm_problem prob) { return(Compute(prob, DEFAULT_LOWER_BOUND, DEFAULT_UPPER_BOUND)); }
static void Main(string[] args) { if (!System.Console.IsOutputRedirected) { System.Console.Clear(); } CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US"); var M = Matrix <double> .Build; var V = Vector <double> .Build; //// =============== Part 1: Loading and Visualizing Data ================ // We start the exercise by first loading and visualizing the dataset. // The following code will load the dataset into your environment and plot // the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data1: // You will have X, y in your environment Dictionary <string, Matrix <double> > ms = MatlabReader.ReadAll <double>("data\\ex6data1.mat"); Matrix <double> X = ms["X"]; // 51 X 2 Vector <double> y = ms["y"].Column(0); // 51 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); Pause(); //// ==================== Part 2: Training Linear SVM ==================== // The following code will train a linear SVM on the dataset and plot the // decision boundary learned. // System.Console.WriteLine("\nTraining Linear SVM ...\n"); // You should try to change the C value below and see how the decision // boundary varies (e.g., try C = 1000) double C = 1.0; var linearKernel = KernelHelper.LinearKernel(); List <List <double> > libSvmData = ConvertToLibSvmFormat(X, y); svm_problem prob = ProblemHelper.ReadProblem(libSvmData); var svc = new C_SVC(prob, linearKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); System.Console.WriteLine(); Pause(); //// =============== Part 3: Implementing Gaussian Kernel =============== // You will now implement the Gaussian kernel to use // with the SVM. You should complete the code in gaussianKernel.m // System.Console.WriteLine("\nEvaluating the Gaussian Kernel ...\n"); double sigma = 2.0; double sim = GaussianKernel( V.DenseOfArray(new [] { 1.0, 2, 1 }), V.DenseOfArray(new [] { 0.0, 4, -1 }), sigma ); System.Console.WriteLine("Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0:f6} :\n\t{1:f6}\n(for sigma = 2, this value should be about 0.324652)\n", sigma, sim); Pause(); //// =============== Part 4: Visualizing Dataset 2 ================ // The following code will load the next dataset into your environment and // plot the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data2: // You will have X, y in your environment ms = MatlabReader.ReadAll <double>("data\\ex6data2.mat"); X = ms["X"]; // 863 X 2 y = ms["y"].Column(0); // 863 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); Pause(); //// ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== // After you have implemented the kernel, we can now use it to train the // SVM classifier. // System.Console.WriteLine("\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n"); // SVM Parameters C = 1; sigma = 0.1; double gamma = 1 / (2 * sigma * sigma); var rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma); libSvmData = ConvertToLibSvmFormat(X, y); prob = ProblemHelper.ReadProblem(libSvmData); svc = new C_SVC(prob, rbfKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); Pause(); double acc = svc.GetCrossValidationAccuracy(10); System.Console.WriteLine("\nCross Validation Accuracy: {0:f6}\n", acc); Pause(); //// =============== Part 6: Visualizing Dataset 3 ================ // The following code will load the next dataset into your environment and // plot the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data2: // You will have X, y in your environment ms = MatlabReader.ReadAll <double>("data\\ex6data3.mat"); Matrix <double> Xval; Vector <double> yval; X = ms["X"]; // 211 X 2 y = ms["y"].Column(0); // 211 X 1 Xval = ms["Xval"]; // 200 X 2 yval = ms["yval"].Column(0); // 200 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); //// ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== // This is a different dataset that you can use to experiment with. Try // different values of C and sigma here. // (C, sigma) = Dataset3Params(X, y, Xval, yval); gamma = 1 / (2 * sigma * sigma); rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma); libSvmData = ConvertToLibSvmFormat(X, y); prob = ProblemHelper.ReadProblem(libSvmData); svc = new C_SVC(prob, rbfKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); Pause(); }
private svm_problem gen_svm_training_data(List <Dictionary <string, double> > docWordDicList, Dictionary <string, int> dictionary, List <int> trainingAnswer) { var prob = new svm_problem(); var vy = new List <double>(); // label list var vx = new List <svm_node[]>(); // node list StreamWriter file = new StreamWriter(SVM_TRAIN_FILE_NAME); for (int i = 0; i < docWordDicList.Count; i++) { String trainStr = trainingAnswer[i] + ""; List <KeyValuePair <int, double> > nodeList = new List <KeyValuePair <int, double> >(); List <string> wordList = docWordDicList[i].Keys.ToList(); foreach (string word in wordList) { if (dictionary.ContainsKey(word)) { int theIndex = dictionary[word]; double theValue = (double)docWordDicList[i][word]; nodeList.Add(new KeyValuePair <int, double>(theIndex, theValue)); } } if (nodeList.Count > 0) { List <svm_node> x = new List <svm_node>(); nodeList.Sort( delegate(KeyValuePair <int, double> firstPair, KeyValuePair <int, double> nextPair) { int a = firstPair.Key; int b = nextPair.Key; return(a.CompareTo(b)); } ); double labelValue = (double)trainingAnswer[i]; for (int k = 0; k < nodeList.Count; k++) { KeyValuePair <int, double> node = nodeList[k]; x.Add(new svm_node() // svm node { index = node.Key, value = node.Value, }); Console.WriteLine(@"## train data - label:{2}, index:{0}, value:{1}", node.Key, node.Value, labelValue); // Sango : just for TEST to output the node to file String theIndex = System.Convert.ToString(node.Key); String theValue = System.Convert.ToString(node.Value); trainStr = trainStr + " " + theIndex + ":" + theValue; } file.WriteLine(trainStr); vy.Add((double)trainingAnswer[i]); // label vx.Add(x.ToArray()); } //Console.WriteLine("## get new data:" + trainStr); } file.Close(); prob.l = vy.Count; prob.x = vx.ToArray(); prob.y = vy.ToArray(); return(prob); }
private void FitInternal(Matrix <double> x, Vector <double> y) { if (this.Kernel.KernelFunction != null) { // you must store a reference to X to compute the kernel in predict // TODO: add keyword copy to copy on demand this.xFit = x; x = this.ComputeKernel(x); if (x.RowCount != x.ColumnCount) { throw new ArgumentException("X.RowCount should be equal to X.ColumnCount"); } } var problem = new svm_problem(); problem.l = x.RowCount; problem.x = new svm_node[x.RowCount][]; foreach (var row in x.RowEnumerator()) { if (Kernel.LibSvmKernel == LibSvmKernel.Precomputed) { var svmNodes = row.Item2.GetIndexedEnumerator().Select(i => new svm_node { index = i.Item1 + 1, value = i.Item2 }); problem.x[row.Item1] = new[] { new svm_node { index = 0, value = row.Item1 + 1 } }.Concat(svmNodes).ToArray(); } else { var svmNodes = row.Item2.GetIndexedEnumerator().Select( i => new svm_node { index = i.Item1, value = i.Item2 }); problem.x[row.Item1] = svmNodes.ToArray(); } } problem.y = y.ToArray(); this.Param.kernel_type = (int)this.Kernel.LibSvmKernel; if (new[] { LibSvmKernel.Poly, LibSvmKernel.Rbf }.Contains(this.Kernel.LibSvmKernel) && this.Gamma == 0) { // if custom gamma is not provided ... this.Param.gamma = 1.0 / x.ColumnCount; } else { this.Param.gamma = this.Gamma; } this.Model = svm.svm_train(problem, this.Param); }