public void LargeCoefficientsTest() { double[,] data = { { 48, 1, 4.40, 0 }, { 60, 0, 7.89, 1 }, { 51, 0, 3.48, 0 }, { 66, 0, 8.41, 1 }, { 40, 1, 3.05, 0 }, { 44, 1, 4.56, 0 }, { 80, 0, 6.91, 1 }, { 52, 0, 5.69, 0 }, { 58, 0, 4.01, 0 }, { 58, 0, 4.48, 0 }, { 72, 1, 5.97, 0 }, { 57, 0, 6.71, 1 }, { 55, 1, 5.36, 0 }, { 71, 0, 5.68, 0 }, { 44, 1, 4.61, 0 }, { 65, 1, 4.80, 0 }, { 38, 0, 5.06, 0 }, { 50, 0, 6.40, 0 }, { 80, 0, 6.67, 1 }, { 69, 1, 5.79, 0 }, { 39, 0, 5.42, 0 }, { 68, 0, 7.61, 1 }, { 47, 1, 3.24, 0 }, { 45, 1, 4.29, 0 }, { 79, 1, 7.44, 1 }, { 41, 1, 4.60, 0 }, { 45, 0, 5.91, 0 }, { 54, 0, 4.77, 0 }, { 43, 1, 5.62, 0 }, { 62, 1, 7.92, 1 }, { 72, 1, 7.92, 1 }, { 57, 1, 6.19, 0 }, { 39, 1, 2.37, 0 }, { 51, 0, 5.84, 0 }, { 73, 1, 5.94, 0 }, { 41, 1, 3.82, 0 }, { 35, 0, 2.35, 0 }, { 69, 0, 6.57, 1 }, { 75, 1, 7.96, 1 }, { 51, 1, 3.96, 0 }, { 61, 1, 4.36, 0 }, { 55, 0, 3.84, 0 }, { 45, 1, 3.02, 0 }, { 48, 0, 4.65, 0 }, { 77, 0, 7.93, 1 }, { 40, 1, 2.46, 0 }, { 37, 1, 2.32, 0 }, { 78, 0, 7.88, 1 }, { 39, 1, 4.55, 0 }, { 41, 0, 2.45, 0 }, { 54, 1, 5.62, 0 }, { 59, 1, 5.03, 0 }, { 78, 0, 8.08, 1 }, { 56, 1, 6.96, 1 }, { 49, 1, 3.07, 0 }, { 48, 0, 4.75, 0 }, { 63, 1, 5.64, 0 }, { 50, 0, 3.35, 0 }, { 59, 1, 5.08, 0 }, { 60, 0, 6.58, 1 }, { 64, 0, 5.19, 0 }, { 76, 1, 6.69, 1 }, { 58, 0, 5.18, 0 }, { 48, 1, 4.47, 0 }, { 72, 0, 8.70, 1 }, { 40, 1, 5.14, 0 }, { 53, 0, 3.40, 0 }, { 79, 0, 9.77, 1 }, { 61, 1, 7.79, 1 }, { 59, 0, 7.42, 1 }, { 44, 0, 2.55, 0 }, { 52, 1, 3.71, 0 }, { 80, 1, 7.56, 1 }, { 76, 0, 7.80, 1 }, { 51, 0, 5.94, 0 }, { 46, 1, 5.52, 0 }, { 48, 0, 3.25, 0 }, { 58, 1, 4.71, 0 }, { 44, 1, 2.52, 0 }, { 68, 0, 8.38, 1 }, }; double[][] input = data.Submatrix(null, 0, 2).ToArray(); double[] output = data.GetColumn(3); LogisticRegression regression = new LogisticRegression(3); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 1e-10; var errors = new List<double>(); for (int i = 0; i < 1000; i++) errors.Add(teacher.Run(input, output)); double error = 0; for (int i = 0; i < output.Length; i++) { double expected = output[i]; double actual = System.Math.Round(regression.Compute(input[i])); if (expected != actual) error++; } error /= output.Length; Assert.AreEqual(error, 0); Assert.AreEqual(-355.59378247276379, regression.Coefficients[0]); Assert.AreEqual(1.2646432605797491, regression.Coefficients[1]); Assert.AreEqual(-10.710529810144157, regression.Coefficients[2]); Assert.AreEqual(44.089493151268726, regression.Coefficients[3]); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking). LogisticRegression regression = new LogisticRegression(inputs: 2); // Next, we are going to estimate this model. For this, we // will use the Iteratively Reweighted Least Squares method. var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 0; // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 0.001); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 double[] actual = new double[output.Length]; for (int i = 0; i < input.Length; i++) actual[i] = regression.Compute(input[i]); double[] expected = { 0.21044171560168326, 0.13242527535212373, 0.65747803433771812, 0.18122484822324372, 0.74755661773156912, 0.61450041841477232, 0.33116705418194975, 0.14474110902457912, 0.43627109657399382, 0.54419383282533118 }; for (int i = 0; i < actual.Length; i++) Assert.AreEqual(expected[i], actual[i]); Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8); Assert.IsFalse(double.IsNaN(ageOdds)); Assert.IsFalse(double.IsNaN(smokeOdds)); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); }
public void LargeCoefficientsTest() { double[,] data = { { 48, 1, 4.40, 0 }, { 60, 0, 7.89, 1 }, { 51, 0, 3.48, 0 }, { 66, 0, 8.41, 1 }, { 40, 1, 3.05, 0 }, { 44, 1, 4.56, 0 }, { 80, 0, 6.91, 1 }, { 52, 0, 5.69, 0 }, { 58, 0, 4.01, 0 }, { 58, 0, 4.48, 0 }, { 72, 1, 5.97, 0 }, { 57, 0, 6.71, 1 }, { 55, 1, 5.36, 0 }, { 71, 0, 5.68, 0 }, { 44, 1, 4.61, 0 }, { 65, 1, 4.80, 0 }, { 38, 0, 5.06, 0 }, { 50, 0, 6.40, 0 }, { 80, 0, 6.67, 1 }, { 69, 1, 5.79, 0 }, { 39, 0, 5.42, 0 }, { 68, 0, 7.61, 1 }, { 47, 1, 3.24, 0 }, { 45, 1, 4.29, 0 }, { 79, 1, 7.44, 1 }, { 41, 1, 4.60, 0 }, { 45, 0, 5.91, 0 }, { 54, 0, 4.77, 0 }, { 43, 1, 5.62, 0 }, { 62, 1, 7.92, 1 }, { 72, 1, 7.92, 1 }, { 57, 1, 6.19, 0 }, { 39, 1, 2.37, 0 }, { 51, 0, 5.84, 0 }, { 73, 1, 5.94, 0 }, { 41, 1, 3.82, 0 }, { 35, 0, 2.35, 0 }, { 69, 0, 6.57, 1 }, { 75, 1, 7.96, 1 }, { 51, 1, 3.96, 0 }, { 61, 1, 4.36, 0 }, { 55, 0, 3.84, 0 }, { 45, 1, 3.02, 0 }, { 48, 0, 4.65, 0 }, { 77, 0, 7.93, 1 }, { 40, 1, 2.46, 0 }, { 37, 1, 2.32, 0 }, { 78, 0, 7.88, 1 }, { 39, 1, 4.55, 0 }, { 41, 0, 2.45, 0 }, { 54, 1, 5.62, 0 }, { 59, 1, 5.03, 0 }, { 78, 0, 8.08, 1 }, { 56, 1, 6.96, 1 }, { 49, 1, 3.07, 0 }, { 48, 0, 4.75, 0 }, { 63, 1, 5.64, 0 }, { 50, 0, 3.35, 0 }, { 59, 1, 5.08, 0 }, { 60, 0, 6.58, 1 }, { 64, 0, 5.19, 0 }, { 76, 1, 6.69, 1 }, { 58, 0, 5.18, 0 }, { 48, 1, 4.47, 0 }, { 72, 0, 8.70, 1 }, { 40, 1, 5.14, 0 }, { 53, 0, 3.40, 0 }, { 79, 0, 9.77, 1 }, { 61, 1, 7.79, 1 }, { 59, 0, 7.42, 1 }, { 44, 0, 2.55, 0 }, { 52, 1, 3.71, 0 }, { 80, 1, 7.56, 1 }, { 76, 0, 7.80, 1 }, { 51, 0, 5.94, 0 }, { 46, 1, 5.52, 0 }, { 48, 0, 3.25, 0 }, { 58, 1, 4.71, 0 }, { 44, 1, 2.52, 0 }, { 68, 0, 8.38, 1 }, }; double[][] input = data.Submatrix(null, 0, 2).ToJagged(); double[] output = data.GetColumn(3); var regression = new LogisticRegression(3); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 1e-10; var errors = new List<double>(); for (int i = 0; i < 1000; i++) errors.Add(teacher.Run(input, output)); double error = 0; for (int i = 0; i < output.Length; i++) { double expected = output[i]; double actual = System.Math.Round(regression.Compute(input[i])); if (expected != actual) error++; } error /= output.Length; Assert.AreEqual(error, 0); Assert.AreEqual(-490.30977151704076, regression.Coefficients[0], 1e-7); Assert.AreEqual(1.7763049293456503, regression.Coefficients[1], 1e-7); Assert.AreEqual(-14.882619671822592, regression.Coefficients[2], 1e-7); Assert.AreEqual(60.5066623676452, regression.Coefficients[3], 1e-7); }