public void learn_sparse_test() { Accord.Math.Random.Generator.Seed = 0; #region doc_learn_sparse // Example regression problem. Suppose we are trying // to model the following equation: f(x, y) = 2x + y Sparse <double>[] inputs = // (x, y) { Sparse.FromDense(new double[] { 0, 1 }), // 2*0 + 1 = 1 Sparse.FromDense(new double[] { 4, 3 }), // 2*4 + 3 = 11 Sparse.FromDense(new double[] { 8, -8 }), // 2*8 - 8 = 8 Sparse.FromDense(new double[] { 2, 2 }), // 2*2 + 2 = 6 Sparse.FromDense(new double[] { 6, 1 }), // 2*6 + 1 = 13 Sparse.FromDense(new double[] { 5, 4 }), // 2*5 + 4 = 14 Sparse.FromDense(new double[] { 9, 1 }), // 2*9 + 1 = 19 Sparse.FromDense(new double[] { 1, 6 }), // 2*1 + 6 = 8 }; double[] outputs = // f(x, y) { 1, 11, 8, 6, 13, 14, 19, 8 }; // Create the linear regression coordinate descent teacher var learn = new LinearRegressionNewtonMethod <Linear, Sparse <double> >() { Complexity = 10000000, Epsilon = 1e-10, Tolerance = 1e-15, }; // Run the learning algorithm var svm = learn.Learn(inputs, outputs); // Compute the answer for one particular example double fxy = svm.Score(inputs[0]); // 1.000 // Check for correct answers double[] answers = svm.Score(inputs); #endregion Assert.AreEqual(1, svm.Weights[0]); Assert.AreEqual(2, svm.SupportVectors[0][0], 1e-8); Assert.AreEqual(1, svm.SupportVectors[0][1], 1e-8); Assert.AreEqual(1.0, fxy, 1e-5); for (int i = 0; i < outputs.Length; i++) { Assert.AreEqual(outputs[i], answers[i], 1e-2); } }
public void ParseTest() { double[] v; Sparse <double> actual; Sparse <double> expected; string s; v = new double[] { 1, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s); Assert.AreEqual(expected, actual); v = new double[] { 0, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s); Assert.AreEqual(expected, actual); v = new double[] { 1, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s, insertValueAtBeginning: 0); Assert.AreEqual(expected, actual); v = new double[] { 0, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s, insertValueAtBeginning: 0); Assert.AreEqual(expected, actual); v = new double[] { 1, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s, insertValueAtBeginning: 1); expected = Sparse.Parse("1:1 2:1 3:2 4:3 7:6"); Assert.AreEqual(expected, actual); v = new double[] { 0, 2, 3, 0, 0, 6 }; expected = Sparse.FromDense(v); s = expected.ToString(); actual = Sparse.Parse(s, insertValueAtBeginning: 42); expected = Sparse.Parse("1:42 3:2 4:3 7:6"); Assert.AreEqual(expected, actual); }
public void ToStringTest() { double[] v; string actual; Sparse <double> d; v = new double[] { 1, 2, 3, 0, 0, 6 }; d = Sparse.FromDense(v); actual = d.ToString(); Assert.AreEqual("1:1 2:2 3:3 6:6", actual); v = new double[] { 0, 0, 2, 3, 0, 0, 6 }; d = Sparse.FromDense(v); actual = d.ToString(); Assert.AreEqual("3:2 4:3 7:6", actual); }
public void sparse_zero_vector_test() { // Create a linear-SVM learning method var teacher = new LinearNewtonMethod <Linear, Sparse <double> >() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model }; // Now suppose you have some points Sparse <double>[] inputs = Sparse.FromDense(new[] { new double[] { 1, 1, 2 }, new double[] { 0, 1, 6 }, new double[] { 1, 0, 8 }, new double[] { 0, 0, 0 }, }); int[] outputs = { 1, -1, 1, -1 }; // Learn the support vector machine var svm = teacher.Learn(inputs, outputs); // Compute the predicted points bool[] predicted = svm.Decide(inputs); // And the squared error loss using double error = new ZeroOneLoss(outputs).Loss(predicted); Assert.AreEqual(3, svm.NumberOfInputs); Assert.AreEqual(1, svm.NumberOfOutputs); Assert.AreEqual(2, svm.NumberOfClasses); Assert.AreEqual(1, svm.Weights.Length); Assert.AreEqual(1, svm.SupportVectors.Length); Assert.AreEqual(1.0, svm.Weights[0], 1e-6); Assert.AreEqual(2.0056922148257597, svm.SupportVectors[0][0], 1e-6); Assert.AreEqual(-0.0085361347231909836, svm.SupportVectors[0][1], 1e-6); Assert.AreEqual(0.0014225721169379331, svm.SupportVectors[0][2], 1e-6); Assert.AreEqual(0.0, error); }
/// <summary> /// Saves this model to disk using LibSVM's model format. /// </summary> /// /// <param name="stream">The stream where the file should be written.</param> /// public void Save(Stream stream) { StreamWriter writer = new StreamWriter(stream); writer.WriteLine("solver_type " + Solver.GetDescription().ToUpperInvariant()); writer.WriteLine("nr_class " + NumberOfClasses); writer.Write("label"); for (int i = 0; i < Labels.Length; i++) { writer.Write(" " + Labels[i]); } writer.WriteLine(); writer.WriteLine("nr_feature " + NumberOfInputs); writer.WriteLine("bias " + Bias.ToString("G17", System.Globalization.CultureInfo.InvariantCulture)); if (this.Vectors == null) { writer.WriteLine("w"); for (int i = 0; i < Weights.Length; i++) { writer.WriteLine(Weights[i].ToString("G17", System.Globalization.CultureInfo.InvariantCulture) + " "); } } else { writer.WriteLine("SV"); for (int i = 0; i < Vectors.Length; i++) { string alpha = Weights[i].ToString("G17", System.Globalization.CultureInfo.InvariantCulture); string values = Sparse.FromDense(Vectors[i]).ToString(); writer.WriteLine(alpha + " " + values); } } writer.Flush(); }
public void learn_sparse_kernel() { #region doc_xor_sparse // As an example, we will try to learn a decision machine // that can replicate the "exclusive-or" logical function: Sparse <double>[] inputs = { Sparse.FromDense(new double[] { 0, 0 }), // the XOR function takes two booleans Sparse.FromDense(new double[] { 0, 1 }), // and computes their exclusive or: the Sparse.FromDense(new double[] { 1, 0 }), // output is true only if the two booleans Sparse.FromDense(new double[] { 1, 1 }) // are different }; int[] xor = // this is the output of the xor function { 0, // 0 xor 0 = 0 (inputs are equal) 1, // 0 xor 1 = 1 (inputs are different) 1, // 1 xor 0 = 1 (inputs are different) 0, // 1 xor 1 = 0 (inputs are equal) }; // Now, we can create the sequential minimal optimization teacher var learn = new SequentialMinimalOptimization <Gaussian, Sparse <double> >() { UseComplexityHeuristic = true, UseKernelEstimation = true }; // And then we can obtain a trained SVM by calling its Learn method var svm = learn.Learn(inputs, xor); // Finally, we can obtain the decisions predicted by the machine: bool[] prediction = svm.Decide(inputs); #endregion Assert.AreEqual(prediction, Classes.Decide(xor)); }
private Sparse <double> Transform(string[] x, out Sparse <double> sparse, double[] work) { IDictionary <string, int> codebook = bow.StringToCode; for (int j = 0; j < x.Length; j++) { int k; if (!codebook.TryGetValue(x[j], out k)) { continue; } work[k]++; } sparse = Sparse.FromDense(work); switch (tf) { case TermFrequency.Binary: for (int j = 0; j < sparse.Values.Length; j++) { sparse.Values[j] = sparse.Values[j] = 1; } break; case TermFrequency.Default: break; case TermFrequency.Log: for (int j = 0; j < sparse.Values.Length; j++) { sparse.Values[j] = 1 + Math.Log(sparse.Values[j]); } break; case TermFrequency.DoubleNormalization: double max = sparse.Values.Max(); for (int j = 0; j < sparse.Values.Length; j++) { sparse.Values[j] = 0.5 + 0.5 * (sparse.Values[j] / max); } break; default: throw new InvalidOperationException("Unknown TermFrequency: {0}".Format(tf)); } // Divide by the inverse document frequency for (int j = 0; j < sparse.Values.Length; j++) { double a = sparse.Values[j]; int k = sparse.Indices[j]; double v = a * inverseDocumentFrequency[k]; #if DEBUG if (Double.IsNaN(v) || Double.IsInfinity(v)) { throw new Exception(); } #endif sparse.Values[j] = v; } return(sparse); }
public void learn_linear_sparse() { #region doc_xor_sparse // As an example, we will try to learn a linear machine that can // replicate the "exclusive-or" logical function. However, since we // will be using a linear SVM, we will not be able to solve this // problem perfectly as the XOR is a non-linear classification problem: Sparse <double>[] inputs = { Sparse.FromDense(new double[] { 0, 0 }), // the XOR function takes two booleans Sparse.FromDense(new double[] { 0, 1 }), // and computes their exclusive or: the Sparse.FromDense(new double[] { 1, 0 }), // output is true only if the two booleans Sparse.FromDense(new double[] { 1, 1 }) // are different }; int[] xor = // this is the output of the xor function { 0, // 0 xor 0 = 0 (inputs are equal) 1, // 0 xor 1 = 1 (inputs are different) 1, // 1 xor 0 = 1 (inputs are different) 0, // 1 xor 1 = 0 (inputs are equal) }; // Now, we can create the sequential minimal optimization teacher var learn = new LinearNewtonMethod <Linear, Sparse <double> >() { UseComplexityHeuristic = true, UseKernelEstimation = false }; // And then we can obtain a trained SVM by calling its Learn method var svm = learn.Learn(inputs, xor); // Finally, we can obtain the decisions predicted by the machine: bool[] prediction = svm.Decide(inputs); #endregion Assert.AreEqual(prediction[0], false); Assert.AreEqual(prediction[1], false); Assert.AreEqual(prediction[2], false); Assert.AreEqual(prediction[3], false); int[] or = // this is the output of the xor function { 0, // 0 or 0 = 0 (inputs are equal) 1, // 0 or 1 = 1 (inputs are different) 1, // 1 or 0 = 1 (inputs are different) 1, // 1 or 1 = 1 (inputs are equal) }; learn = new LinearNewtonMethod <Linear, Sparse <double> >() { Complexity = 1e+8, UseKernelEstimation = false }; svm = learn.Learn(inputs, or); prediction = svm.Decide(inputs); Assert.AreEqual(0, inputs[0].Indices.Length); Assert.AreEqual(1, inputs[1].Indices.Length); Assert.AreEqual(1, inputs[2].Indices.Length); Assert.AreEqual(2, inputs[3].Indices.Length); Assert.AreEqual(prediction[0], false); Assert.AreEqual(prediction[1], true); Assert.AreEqual(prediction[2], true); Assert.AreEqual(prediction[3], true); }
public void linear_regression_sparse_test() { #region doc_linreg_sparse // Declare some training data. This is exactly the same // data used in the MultipleLinearRegression documentation page // We will try to model a plane as an equation in the form // "ax + by + c = z". We have two input variables (x and y) // and we will be trying to find two parameters a and b and // an intercept term c. // Create a linear-SVM learning method var teacher = new LinearNewtonMethod <Linear, Sparse <double> >() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model }; // Now suppose you have some points Sparse <double>[] inputs = Sparse.FromDense(new[] { new double[] { 1, 1 }, new double[] { 0, 1 }, new double[] { 1, 0 }, new double[] { 0, 0 }, }); // located in the same Z (z = 1) double[] outputs = { 1, 1, 1, 1 }; // Learn the support vector machine var svm = teacher.Learn(inputs, outputs); // Convert the svm to logistic regression var regression = (MultipleLinearRegression)svm; // As result, we will be given the following: double a = regression.Weights[0]; // a = 0 double b = regression.Weights[1]; // b = 0 double c = regression.Intercept; // c = 1 // This is the plane described by the equation // ax + by + c = z => 0x + 0y + 1 = z => 1 = z. // We can compute the predicted points using double[] predicted = regression.Transform(inputs.ToDense()); // And the squared error loss using double error = new SquareLoss(outputs).Loss(predicted); #endregion Assert.AreEqual(2, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(0.0, a, 1e-6); Assert.AreEqual(0.0, b, 1e-6); Assert.AreEqual(1.0, c, 1e-6); Assert.AreEqual(0.0, error, 1e-6); double[] expected = regression.Compute(inputs.ToDense()); double[] actual = regression.Transform(inputs.ToDense()); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); double r = regression.CoefficientOfDetermination(inputs.ToDense(), outputs); Assert.AreEqual(1.0, r); }
Sparse <double> ITransform <string[], Sparse <double> > .Transform(string[] input) { return(Sparse.FromDense(Transform(input))); }
public void logistic_regression_sparse_test() { #region doc_logreg_sparse // Declare some training data. This is exactly the same // data used in the LogisticRegression documentation page // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). Sparse <double>[] input = { // age, smokes?, had cancer? Sparse.FromDense(new double[] { 55, 0 }), // false - no cancer Sparse.FromDense(new double[] { 28, 0 }), // false Sparse.FromDense(new double[] { 65, 1 }), // false Sparse.FromDense(new double[] { 46, 0 }), // true - had cancer Sparse.FromDense(new double[] { 86, 1 }), // true Sparse.FromDense(new double[] { 56, 1 }), // true Sparse.FromDense(new double[] { 85, 0 }), // false Sparse.FromDense(new double[] { 33, 0 }), // false Sparse.FromDense(new double[] { 21, 1 }), // false Sparse.FromDense(new double[] { 42, 1 }), // true }; double[] output = // Whether each patient had lung cancer or not { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // Create the L1-regularization learning algorithm var teacher = new ProbabilisticCoordinateDescent <Linear, Sparse <double> >() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model }; // Learn the L1-regularized machine var svm = teacher.Learn(input, output); // Convert the svm to logistic regression var regression = (LogisticRegression)svm; // Compute the predicted outcome for inputs bool[] predicted = regression.Decide(input.ToDense(regression.NumberOfInputs)); // Compute log-likelihood scores for the outputs double[] scores = regression.LogLikelihood(input.ToDense(regression.NumberOfInputs)); // Compute odds-ratio as in the LogisticRegression example double ageOdds = regression.GetOddsRatio(1); // 1.0208597029158772 double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 // Compute the classification error as in SVM example double error = new ZeroOneLoss(output).Loss(predicted); #endregion var rsvm = (SupportVectorMachine)regression; Assert.AreEqual(2, rsvm.NumberOfInputs); Assert.AreEqual(2, rsvm.NumberOfOutputs); // TODO: Maybe should 1 rather than 2 double[] svmpred = svm.Score(input); Assert.IsTrue(scores.IsEqual(svmpred, 1e-10)); Assert.AreEqual(0.2, error); Assert.AreEqual(1.0208597029158772, ageOdds, 1e-4); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-4); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-8); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); }
public void logistic_regression_sparse_test() { Accord.Math.Random.Generator.Seed = 0; #region doc_logreg_sparse // Declare some training data. This is exactly the same // data used in the LogisticRegression documentation page // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). Sparse <double>[] input = { // age, smokes?, had cancer? Sparse.FromDense(new double[] { 55, 0 }), // false - no cancer Sparse.FromDense(new double[] { 28, 0 }), // false Sparse.FromDense(new double[] { 65, 1 }), // false Sparse.FromDense(new double[] { 46, 0 }), // true - had cancer Sparse.FromDense(new double[] { 86, 1 }), // true Sparse.FromDense(new double[] { 56, 1 }), // true Sparse.FromDense(new double[] { 85, 0 }), // false Sparse.FromDense(new double[] { 33, 0 }), // false Sparse.FromDense(new double[] { 21, 1 }), // false Sparse.FromDense(new double[] { 42, 1 }), // true }; double[] output = // Whether each patient had lung cancer or not { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // Create the probabilistic-SVM learning algorithm var teacher = new ProbabilisticDualCoordinateDescent <Linear, Sparse <double> >() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model }; // Learn the support vector machine var svm = teacher.Learn(input, output); // Convert the svm to logistic regression var regression = (LogisticRegression)svm; // Compute the predicted outcome for inputs bool[] predicted = regression.Decide(input.ToDense(regression.NumberOfInputs)); // Compute probability scores for the outputs double[] scores = regression.Score(input.ToDense(regression.NumberOfInputs)); // Compute odds-ratio as in the LogisticRegression example double ageOdds = regression.GetOddsRatio(1); // 1.0430443799578411 double smokeOdds = regression.GetOddsRatio(2); // 7.2414593749145508 // Compute the classification error as in SVM example double error = new ZeroOneLoss(output).Loss(predicted); #endregion var rsvm = (SupportVectorMachine)regression; Assert.AreEqual(2, rsvm.NumberOfInputs); Assert.AreEqual(2, rsvm.NumberOfOutputs); double[] svmpred = svm.Probability(input); Assert.IsTrue(scores.IsEqual(svmpred, 1e-10)); Assert.AreEqual(0.4, error); Assert.AreEqual(1.0430443799578411, ageOdds, 1e-4); Assert.AreEqual(7.2414593749145508, smokeOdds, 1e-4); Assert.AreEqual(-21.4120677536517, regression.Intercept, 1e-8); Assert.AreEqual(-21.4120677536517, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.042143725408546939, regression.Coefficients[1], 1e-8); Assert.AreEqual(1.9798227572056906, regression.Coefficients[2], 1e-8); }
/// <summary> /// Writes the given feature vector and associated output label/value to the file. /// </summary> /// /// <param name="feature">The feature vector to be written.</param> /// <param name="output">The output value to be written.</param> /// <param name="comment">An optional comment describing the feature.</param> /// public void Write(double[] feature, double output, string comment) { Write(Sparse.FromDense(feature), output, comment); }
/// <summary> /// Writes the given feature vector and associated output label/value to the file. /// </summary> /// /// <param name="feature">The feature vector to be written.</param> /// <param name="output">The output value to be written.</param> /// public void Write(double[] feature, bool output) { Write(Sparse.FromDense(feature), output); }