public void learn_test_2() { #region doc_learn_2 // Let's say we would like predict a continuous number from a set // of discrete and continuous input variables. For this, we will // be using the Servo dataset from UCI's Machine Learning repository // as an example: http://archive.ics.uci.edu/ml/datasets/Servo // Create a Servo dataset Servo servo = new Servo(); object[][] instances = servo.Instances; // 167 x 4 double[] outputs = servo.Output; // 167 x 1 // This dataset contains 4 columns, where the first two are // symbolic (having possible values A, B, C, D, E), and the // last two are continuous. // We will use a codification filter to transform the symbolic // variables into one-hot vectors, while keeping the other two // continuous variables intact: var codebook = new Codification <object>() { { "motor", CodificationVariable.Categorical }, { "screw", CodificationVariable.Categorical }, { "pgain", CodificationVariable.Continuous }, { "vgain", CodificationVariable.Continuous }, }; // Learn the codebook codebook.Learn(instances); // We can gather some info about the problem: int numberOfInputs = codebook.NumberOfInputs; // should be 4 (since there are 4 variables) int numberOfOutputs = codebook.NumberOfOutputs; // should be 12 (due their one-hot encodings) // Now we can use it to obtain double[] vectors: double[][] inputs = codebook.ToDouble().Transform(instances); // We will use Ordinary Least Squares to create a // linear regression model with an intercept term var ols = new OrdinaryLeastSquares() { UseIntercept = true }; // Use Ordinary Least Squares to estimate a regression model: MultipleLinearRegression regression = ols.Learn(inputs, outputs); // We can compute the predicted points using: double[] predicted = regression.Transform(inputs); // And the squared error using the SquareLoss class: double error = new SquareLoss(outputs).Loss(predicted); // We can also compute other measures, such as the coefficient of determination r² using: double r2 = new RSquaredLoss(numberOfOutputs, outputs).Loss(predicted); // should be 0.55086630162967354 // Or the adjusted or weighted versions of r² using: var r2loss = new RSquaredLoss(numberOfOutputs, outputs) { Adjust = true, // Weights = weights; // (uncomment if you have a weighted problem) }; double ar2 = r2loss.Loss(predicted); // should be 0.51586887058782993 // Alternatively, we can also use the less generic, but maybe more user-friendly method directly: double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 0.51586887058782993 #endregion Assert.AreEqual(4, numberOfInputs); Assert.AreEqual(12, numberOfOutputs); Assert.AreEqual(12, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(1.0859586717266123, error, 1e-6); double[] expected = regression.Compute(inputs); double[] actual = regression.Transform(inputs); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); Assert.AreEqual(0.55086630162967354, r2); Assert.AreEqual(0.51586887058782993, ar2); Assert.AreEqual(0.51586887058782993, ur2); }