Exemplo n.º 1
0
        public void learn_test_2()
        {
            #region doc_learn_2
            // Let's say we would like predict a continuous number from a set
            // of discrete and continuous input variables. For this, we will
            // be using the Servo dataset from UCI's Machine Learning repository
            // as an example: http://archive.ics.uci.edu/ml/datasets/Servo

            // Create a Servo dataset
            Servo      servo     = new Servo();
            object[][] instances = servo.Instances; // 167 x 4
            double[]   outputs   = servo.Output;    // 167 x 1

            // This dataset contains 4 columns, where the first two are
            // symbolic (having possible values A, B, C, D, E), and the
            // last two are continuous.

            // We will use a codification filter to transform the symbolic
            // variables into one-hot vectors, while keeping the other two
            // continuous variables intact:
            var codebook = new Codification <object>()
            {
                { "motor", CodificationVariable.Categorical },
                { "screw", CodificationVariable.Categorical },
                { "pgain", CodificationVariable.Continuous },
                { "vgain", CodificationVariable.Continuous },
            };

            // Learn the codebook
            codebook.Learn(instances);

            // We can gather some info about the problem:
            int numberOfInputs  = codebook.NumberOfInputs;  // should be 4 (since there are 4 variables)
            int numberOfOutputs = codebook.NumberOfOutputs; // should be 12 (due their one-hot encodings)

            // Now we can use it to obtain double[] vectors:
            double[][] inputs = codebook.ToDouble().Transform(instances);

            // We will use Ordinary Least Squares to create a
            // linear regression model with an intercept term
            var ols = new OrdinaryLeastSquares()
            {
                UseIntercept = true
            };

            // Use Ordinary Least Squares to estimate a regression model:
            MultipleLinearRegression regression = ols.Learn(inputs, outputs);

            // We can compute the predicted points using:
            double[] predicted = regression.Transform(inputs);

            // And the squared error using the SquareLoss class:
            double error = new SquareLoss(outputs).Loss(predicted);

            // We can also compute other measures, such as the coefficient of determination r² using:
            double r2 = new RSquaredLoss(numberOfOutputs, outputs).Loss(predicted); // should be 0.55086630162967354

            // Or the adjusted or weighted versions of r² using:
            var r2loss = new RSquaredLoss(numberOfOutputs, outputs)
            {
                Adjust = true,
                // Weights = weights; // (uncomment if you have a weighted problem)
            };

            double ar2 = r2loss.Loss(predicted); // should be 0.51586887058782993

            // Alternatively, we can also use the less generic, but maybe more user-friendly method directly:
            double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 0.51586887058782993
            #endregion

            Assert.AreEqual(4, numberOfInputs);
            Assert.AreEqual(12, numberOfOutputs);
            Assert.AreEqual(12, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);

            Assert.AreEqual(1.0859586717266123, error, 1e-6);

            double[] expected = regression.Compute(inputs);
            double[] actual   = regression.Transform(inputs);
            Assert.IsTrue(expected.IsEqual(actual, 1e-10));

            Assert.AreEqual(0.55086630162967354, r2);
            Assert.AreEqual(0.51586887058782993, ar2);
            Assert.AreEqual(0.51586887058782993, ur2);
        }