Exemple #1
0
        public void learn_sparse_test()
        {
            Accord.Math.Random.Generator.Seed = 0;

            #region doc_learn_sparse
            // Example regression problem. Suppose we are trying
            // to model the following equation: f(x, y) = 2x + y

            Sparse <double>[] inputs =                    // (x, y)
            {
                Sparse.FromDense(new double[] { 0,  1 }), // 2*0 + 1 =  1
                Sparse.FromDense(new double[] { 4,  3 }), // 2*4 + 3 = 11
                Sparse.FromDense(new double[] { 8, -8 }), // 2*8 - 8 =  8
                Sparse.FromDense(new double[] { 2,  2 }), // 2*2 + 2 =  6
                Sparse.FromDense(new double[] { 6,  1 }), // 2*6 + 1 = 13
                Sparse.FromDense(new double[] { 5,  4 }), // 2*5 + 4 = 14
                Sparse.FromDense(new double[] { 9,  1 }), // 2*9 + 1 = 19
                Sparse.FromDense(new double[] { 1,  6 }), // 2*1 + 6 =  8
            };

            double[] outputs = // f(x, y)
            {
                1, 11, 8, 6, 13, 14, 19, 8
            };

            // Create the linear regression coordinate descent teacher
            var learn = new LinearRegressionNewtonMethod <Linear, Sparse <double> >()
            {
                Complexity = 10000000,
                Epsilon    = 1e-10,
                Tolerance  = 1e-15,
            };

            // Run the learning algorithm
            var svm = learn.Learn(inputs, outputs);

            // Compute the answer for one particular example
            double fxy = svm.Score(inputs[0]); // 1.000

            // Check for correct answers
            double[] answers = svm.Score(inputs);
            #endregion

            Assert.AreEqual(1, svm.Weights[0]);
            Assert.AreEqual(2, svm.SupportVectors[0][0], 1e-8);
            Assert.AreEqual(1, svm.SupportVectors[0][1], 1e-8);

            Assert.AreEqual(1.0, fxy, 1e-5);
            for (int i = 0; i < outputs.Length; i++)
            {
                Assert.AreEqual(outputs[i], answers[i], 1e-2);
            }
        }
        public void ParseTest()
        {
            double[]        v;
            Sparse <double> actual;
            Sparse <double> expected;
            string          s;

            v        = new double[] { 1, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s);
            Assert.AreEqual(expected, actual);

            v        = new double[] { 0, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s);
            Assert.AreEqual(expected, actual);


            v        = new double[] { 1, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s, insertValueAtBeginning: 0);
            Assert.AreEqual(expected, actual);

            v        = new double[] { 0, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s, insertValueAtBeginning: 0);
            Assert.AreEqual(expected, actual);



            v        = new double[] { 1, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s, insertValueAtBeginning: 1);
            expected = Sparse.Parse("1:1 2:1 3:2 4:3 7:6");
            Assert.AreEqual(expected, actual);

            v        = new double[] { 0, 2, 3, 0, 0, 6 };
            expected = Sparse.FromDense(v);
            s        = expected.ToString();
            actual   = Sparse.Parse(s, insertValueAtBeginning: 42);
            expected = Sparse.Parse("1:42 3:2 4:3 7:6");
            Assert.AreEqual(expected, actual);
        }
Exemple #3
0
        public void ToStringTest()
        {
            double[]        v;
            string          actual;
            Sparse <double> d;

            v = new double[] { 1, 2, 3, 0, 0, 6 };
            d = Sparse.FromDense(v);

            actual = d.ToString();
            Assert.AreEqual("1:1 2:2 3:3 6:6", actual);

            v = new double[] { 0, 0, 2, 3, 0, 0, 6 };
            d = Sparse.FromDense(v);

            actual = d.ToString();
            Assert.AreEqual("3:2 4:3 7:6", actual);
        }
Exemple #4
0
        public void sparse_zero_vector_test()
        {
            // Create a linear-SVM learning method
            var teacher = new LinearNewtonMethod <Linear, Sparse <double> >()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10, // learn a hard-margin model
            };

            // Now suppose you have some points
            Sparse <double>[] inputs = Sparse.FromDense(new[]
            {
                new double[] { 1, 1, 2 },
                new double[] { 0, 1, 6 },
                new double[] { 1, 0, 8 },
                new double[] { 0, 0, 0 },
            });

            int[] outputs = { 1, -1, 1, -1 };

            // Learn the support vector machine
            var svm = teacher.Learn(inputs, outputs);

            // Compute the predicted points
            bool[] predicted = svm.Decide(inputs);

            // And the squared error loss using
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            Assert.AreEqual(3, svm.NumberOfInputs);
            Assert.AreEqual(1, svm.NumberOfOutputs);
            Assert.AreEqual(2, svm.NumberOfClasses);

            Assert.AreEqual(1, svm.Weights.Length);
            Assert.AreEqual(1, svm.SupportVectors.Length);

            Assert.AreEqual(1.0, svm.Weights[0], 1e-6);
            Assert.AreEqual(2.0056922148257597, svm.SupportVectors[0][0], 1e-6);
            Assert.AreEqual(-0.0085361347231909836, svm.SupportVectors[0][1], 1e-6);
            Assert.AreEqual(0.0014225721169379331, svm.SupportVectors[0][2], 1e-6);
            Assert.AreEqual(0.0, error);
        }
Exemple #5
0
        /// <summary>
        ///   Saves this model to disk using LibSVM's model format.
        /// </summary>
        ///
        /// <param name="stream">The stream where the file should be written.</param>
        ///
        public void Save(Stream stream)
        {
            StreamWriter writer = new StreamWriter(stream);

            writer.WriteLine("solver_type " + Solver.GetDescription().ToUpperInvariant());
            writer.WriteLine("nr_class " + NumberOfClasses);

            writer.Write("label");
            for (int i = 0; i < Labels.Length; i++)
            {
                writer.Write(" " + Labels[i]);
            }
            writer.WriteLine();

            writer.WriteLine("nr_feature " + NumberOfInputs);
            writer.WriteLine("bias " + Bias.ToString("G17", System.Globalization.CultureInfo.InvariantCulture));

            if (this.Vectors == null)
            {
                writer.WriteLine("w");

                for (int i = 0; i < Weights.Length; i++)
                {
                    writer.WriteLine(Weights[i].ToString("G17", System.Globalization.CultureInfo.InvariantCulture) + " ");
                }
            }
            else
            {
                writer.WriteLine("SV");

                for (int i = 0; i < Vectors.Length; i++)
                {
                    string alpha  = Weights[i].ToString("G17", System.Globalization.CultureInfo.InvariantCulture);
                    string values = Sparse.FromDense(Vectors[i]).ToString();
                    writer.WriteLine(alpha + " " + values);
                }
            }

            writer.Flush();
        }
Exemple #6
0
        public void learn_sparse_kernel()
        {
            #region doc_xor_sparse
            // As an example, we will try to learn a decision machine
            // that can replicate the "exclusive-or" logical function:

            Sparse <double>[] inputs =
            {
                Sparse.FromDense(new double[] { 0, 0 }), // the XOR function takes two booleans
                Sparse.FromDense(new double[] { 0, 1 }), // and computes their exclusive or: the
                Sparse.FromDense(new double[] { 1, 0 }), // output is true only if the two booleans
                Sparse.FromDense(new double[] { 1, 1 })  // are different
            };

            int[] xor = // this is the output of the xor function
            {
                0,      // 0 xor 0 = 0 (inputs are equal)
                1,      // 0 xor 1 = 1 (inputs are different)
                1,      // 1 xor 0 = 1 (inputs are different)
                0,      // 1 xor 1 = 0 (inputs are equal)
            };

            // Now, we can create the sequential minimal optimization teacher
            var learn = new SequentialMinimalOptimization <Gaussian, Sparse <double> >()
            {
                UseComplexityHeuristic = true,
                UseKernelEstimation    = true
            };

            // And then we can obtain a trained SVM by calling its Learn method
            var svm = learn.Learn(inputs, xor);

            // Finally, we can obtain the decisions predicted by the machine:
            bool[] prediction = svm.Decide(inputs);
            #endregion

            Assert.AreEqual(prediction, Classes.Decide(xor));
        }
Exemple #7
0
        private Sparse <double> Transform(string[] x, out Sparse <double> sparse, double[] work)
        {
            IDictionary <string, int> codebook = bow.StringToCode;

            for (int j = 0; j < x.Length; j++)
            {
                int k;
                if (!codebook.TryGetValue(x[j], out k))
                {
                    continue;
                }

                work[k]++;
            }

            sparse = Sparse.FromDense(work);

            switch (tf)
            {
            case TermFrequency.Binary:
                for (int j = 0; j < sparse.Values.Length; j++)
                {
                    sparse.Values[j] = sparse.Values[j] = 1;
                }
                break;

            case TermFrequency.Default:
                break;

            case TermFrequency.Log:
                for (int j = 0; j < sparse.Values.Length; j++)
                {
                    sparse.Values[j] = 1 + Math.Log(sparse.Values[j]);
                }
                break;

            case TermFrequency.DoubleNormalization:
                double max = sparse.Values.Max();
                for (int j = 0; j < sparse.Values.Length; j++)
                {
                    sparse.Values[j] = 0.5 + 0.5 * (sparse.Values[j] / max);
                }
                break;

            default:
                throw new InvalidOperationException("Unknown TermFrequency: {0}".Format(tf));
            }

            // Divide by the inverse document frequency
            for (int j = 0; j < sparse.Values.Length; j++)
            {
                double a = sparse.Values[j];
                int    k = sparse.Indices[j];
                double v = a * inverseDocumentFrequency[k];

#if DEBUG
                if (Double.IsNaN(v) || Double.IsInfinity(v))
                {
                    throw new Exception();
                }
#endif
                sparse.Values[j] = v;
            }

            return(sparse);
        }
Exemple #8
0
        public void learn_linear_sparse()
        {
            #region doc_xor_sparse
            // As an example, we will try to learn a linear machine  that can
            // replicate the "exclusive-or" logical function. However, since we
            // will be using a linear SVM, we will not be able to solve this
            // problem perfectly as the XOR is a non-linear classification problem:
            Sparse <double>[] inputs =
            {
                Sparse.FromDense(new double[] { 0, 0 }), // the XOR function takes two booleans
                Sparse.FromDense(new double[] { 0, 1 }), // and computes their exclusive or: the
                Sparse.FromDense(new double[] { 1, 0 }), // output is true only if the two booleans
                Sparse.FromDense(new double[] { 1, 1 })  // are different
            };

            int[] xor = // this is the output of the xor function
            {
                0,      // 0 xor 0 = 0 (inputs are equal)
                1,      // 0 xor 1 = 1 (inputs are different)
                1,      // 1 xor 0 = 1 (inputs are different)
                0,      // 1 xor 1 = 0 (inputs are equal)
            };

            // Now, we can create the sequential minimal optimization teacher
            var learn = new LinearNewtonMethod <Linear, Sparse <double> >()
            {
                UseComplexityHeuristic = true,
                UseKernelEstimation    = false
            };

            // And then we can obtain a trained SVM by calling its Learn method
            var svm = learn.Learn(inputs, xor);

            // Finally, we can obtain the decisions predicted by the machine:
            bool[] prediction = svm.Decide(inputs);
            #endregion

            Assert.AreEqual(prediction[0], false);
            Assert.AreEqual(prediction[1], false);
            Assert.AreEqual(prediction[2], false);
            Assert.AreEqual(prediction[3], false);


            int[] or = // this is the output of the xor function
            {
                0,     // 0 or 0 = 0 (inputs are equal)
                1,     // 0 or 1 = 1 (inputs are different)
                1,     // 1 or 0 = 1 (inputs are different)
                1,     // 1 or 1 = 1 (inputs are equal)
            };


            learn = new LinearNewtonMethod <Linear, Sparse <double> >()
            {
                Complexity          = 1e+8,
                UseKernelEstimation = false
            };

            svm = learn.Learn(inputs, or);

            prediction = svm.Decide(inputs);

            Assert.AreEqual(0, inputs[0].Indices.Length);
            Assert.AreEqual(1, inputs[1].Indices.Length);
            Assert.AreEqual(1, inputs[2].Indices.Length);
            Assert.AreEqual(2, inputs[3].Indices.Length);

            Assert.AreEqual(prediction[0], false);
            Assert.AreEqual(prediction[1], true);
            Assert.AreEqual(prediction[2], true);
            Assert.AreEqual(prediction[3], true);
        }
Exemple #9
0
        public void linear_regression_sparse_test()
        {
            #region doc_linreg_sparse
            // Declare some training data. This is exactly the same
            // data used in the MultipleLinearRegression documentation page

            // We will try to model a plane as an equation in the form
            // "ax + by + c = z". We have two input variables (x and y)
            // and we will be trying to find two parameters a and b and
            // an intercept term c.

            // Create a linear-SVM learning method
            var teacher = new LinearNewtonMethod <Linear, Sparse <double> >()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10, // learn a hard-margin model
            };

            // Now suppose you have some points
            Sparse <double>[] inputs = Sparse.FromDense(new[]
            {
                new double[] { 1, 1 },
                new double[] { 0, 1 },
                new double[] { 1, 0 },
                new double[] { 0, 0 },
            });

            // located in the same Z (z = 1)
            double[] outputs = { 1, 1, 1, 1 };

            // Learn the support vector machine
            var svm = teacher.Learn(inputs, outputs);

            // Convert the svm to logistic regression
            var regression = (MultipleLinearRegression)svm;

            // As result, we will be given the following:
            double a = regression.Weights[0]; // a = 0
            double b = regression.Weights[1]; // b = 0
            double c = regression.Intercept;  // c = 1

            // This is the plane described by the equation
            // ax + by + c = z => 0x + 0y + 1 = z => 1 = z.

            // We can compute the predicted points using
            double[] predicted = regression.Transform(inputs.ToDense());

            // And the squared error loss using
            double error = new SquareLoss(outputs).Loss(predicted);
            #endregion

            Assert.AreEqual(2, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);


            Assert.AreEqual(0.0, a, 1e-6);
            Assert.AreEqual(0.0, b, 1e-6);
            Assert.AreEqual(1.0, c, 1e-6);
            Assert.AreEqual(0.0, error, 1e-6);

            double[] expected = regression.Compute(inputs.ToDense());
            double[] actual   = regression.Transform(inputs.ToDense());
            Assert.IsTrue(expected.IsEqual(actual, 1e-10));

            double r = regression.CoefficientOfDetermination(inputs.ToDense(), outputs);
            Assert.AreEqual(1.0, r);
        }
Exemple #10
0
 Sparse <double> ITransform <string[], Sparse <double> > .Transform(string[] input)
 {
     return(Sparse.FromDense(Transform(input)));
 }
Exemple #11
0
        public void logistic_regression_sparse_test()
        {
            #region doc_logreg_sparse
            // Declare some training data. This is exactly the same
            // data used in the LogisticRegression documentation page

            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).

            Sparse <double>[] input =
            {                                             // age, smokes?, had cancer?
                Sparse.FromDense(new double[] { 55, 0 }), // false - no cancer
                Sparse.FromDense(new double[] { 28, 0 }), // false
                Sparse.FromDense(new double[] { 65, 1 }), // false
                Sparse.FromDense(new double[] { 46, 0 }), // true  - had cancer
                Sparse.FromDense(new double[] { 86, 1 }), // true
                Sparse.FromDense(new double[] { 56, 1 }), // true
                Sparse.FromDense(new double[] { 85, 0 }), // false
                Sparse.FromDense(new double[] { 33, 0 }), // false
                Sparse.FromDense(new double[] { 21, 1 }), // false
                Sparse.FromDense(new double[] { 42, 1 }), // true
            };

            double[] output = // Whether each patient had lung cancer or not
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };

            // Create the L1-regularization learning algorithm
            var teacher = new ProbabilisticCoordinateDescent <Linear, Sparse <double> >()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10, // learn a hard-margin model
            };

            // Learn the L1-regularized machine
            var svm = teacher.Learn(input, output);

            // Convert the svm to logistic regression
            var regression = (LogisticRegression)svm;

            // Compute the predicted outcome for inputs
            bool[] predicted = regression.Decide(input.ToDense(regression.NumberOfInputs));

            // Compute log-likelihood scores for the outputs
            double[] scores = regression.LogLikelihood(input.ToDense(regression.NumberOfInputs));

            // Compute odds-ratio as in the LogisticRegression example
            double ageOdds   = regression.GetOddsRatio(1); // 1.0208597029158772
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331

            // Compute the classification error as in SVM example
            double error = new ZeroOneLoss(output).Loss(predicted);
            #endregion

            var rsvm = (SupportVectorMachine)regression;
            Assert.AreEqual(2, rsvm.NumberOfInputs);
            Assert.AreEqual(2, rsvm.NumberOfOutputs); // TODO: Maybe should 1 rather than 2
            double[] svmpred = svm.Score(input);
            Assert.IsTrue(scores.IsEqual(svmpred, 1e-10));

            Assert.AreEqual(0.2, error);
            Assert.AreEqual(1.0208597029158772, ageOdds, 1e-4);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-4);

            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-8);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);
        }
        public void logistic_regression_sparse_test()
        {
            Accord.Math.Random.Generator.Seed = 0;

            #region doc_logreg_sparse
            // Declare some training data. This is exactly the same
            // data used in the LogisticRegression documentation page

            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).

            Sparse <double>[] input =
            {                                             // age, smokes?, had cancer?
                Sparse.FromDense(new double[] { 55, 0 }), // false - no cancer
                Sparse.FromDense(new double[] { 28, 0 }), // false
                Sparse.FromDense(new double[] { 65, 1 }), // false
                Sparse.FromDense(new double[] { 46, 0 }), // true  - had cancer
                Sparse.FromDense(new double[] { 86, 1 }), // true
                Sparse.FromDense(new double[] { 56, 1 }), // true
                Sparse.FromDense(new double[] { 85, 0 }), // false
                Sparse.FromDense(new double[] { 33, 0 }), // false
                Sparse.FromDense(new double[] { 21, 1 }), // false
                Sparse.FromDense(new double[] { 42, 1 }), // true
            };

            double[] output = // Whether each patient had lung cancer or not
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };

            // Create the probabilistic-SVM learning algorithm
            var teacher = new ProbabilisticDualCoordinateDescent <Linear, Sparse <double> >()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10, // learn a hard-margin model
            };

            // Learn the support vector machine
            var svm = teacher.Learn(input, output);

            // Convert the svm to logistic regression
            var regression = (LogisticRegression)svm;

            // Compute the predicted outcome for inputs
            bool[] predicted = regression.Decide(input.ToDense(regression.NumberOfInputs));

            // Compute probability scores for the outputs
            double[] scores = regression.Score(input.ToDense(regression.NumberOfInputs));

            // Compute odds-ratio as in the LogisticRegression example
            double ageOdds   = regression.GetOddsRatio(1); // 1.0430443799578411
            double smokeOdds = regression.GetOddsRatio(2); // 7.2414593749145508

            // Compute the classification error as in SVM example
            double error = new ZeroOneLoss(output).Loss(predicted);
            #endregion

            var rsvm = (SupportVectorMachine)regression;
            Assert.AreEqual(2, rsvm.NumberOfInputs);
            Assert.AreEqual(2, rsvm.NumberOfOutputs);
            double[] svmpred = svm.Probability(input);
            Assert.IsTrue(scores.IsEqual(svmpred, 1e-10));

            Assert.AreEqual(0.4, error);
            Assert.AreEqual(1.0430443799578411, ageOdds, 1e-4);
            Assert.AreEqual(7.2414593749145508, smokeOdds, 1e-4);

            Assert.AreEqual(-21.4120677536517, regression.Intercept, 1e-8);
            Assert.AreEqual(-21.4120677536517, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.042143725408546939, regression.Coefficients[1], 1e-8);
            Assert.AreEqual(1.9798227572056906, regression.Coefficients[2], 1e-8);
        }
 /// <summary>
 ///   Writes the given feature vector and associated output label/value to the file.
 /// </summary>
 ///
 /// <param name="feature">The feature vector to be written.</param>
 /// <param name="output">The output value to be written.</param>
 /// <param name="comment">An optional comment describing the feature.</param>
 ///
 public void Write(double[] feature, double output, string comment)
 {
     Write(Sparse.FromDense(feature), output, comment);
 }
 /// <summary>
 ///   Writes the given feature vector and associated output label/value to the file.
 /// </summary>
 ///
 /// <param name="feature">The feature vector to be written.</param>
 /// <param name="output">The output value to be written.</param>
 ///
 public void Write(double[] feature, bool output)
 {
     Write(Sparse.FromDense(feature), output);
 }