public void LargeCoefficientsTest() { double[,] data = { { 48, 1, 4.40, 0 }, { 60, 0, 7.89, 1 }, { 51, 0, 3.48, 0 }, { 66, 0, 8.41, 1 }, { 40, 1, 3.05, 0 }, { 44, 1, 4.56, 0 }, { 80, 0, 6.91, 1 }, { 52, 0, 5.69, 0 }, { 58, 0, 4.01, 0 }, { 58, 0, 4.48, 0 }, { 72, 1, 5.97, 0 }, { 57, 0, 6.71, 1 }, { 55, 1, 5.36, 0 }, { 71, 0, 5.68, 0 }, { 44, 1, 4.61, 0 }, { 65, 1, 4.80, 0 }, { 38, 0, 5.06, 0 }, { 50, 0, 6.40, 0 }, { 80, 0, 6.67, 1 }, { 69, 1, 5.79, 0 }, { 39, 0, 5.42, 0 }, { 68, 0, 7.61, 1 }, { 47, 1, 3.24, 0 }, { 45, 1, 4.29, 0 }, { 79, 1, 7.44, 1 }, { 41, 1, 4.60, 0 }, { 45, 0, 5.91, 0 }, { 54, 0, 4.77, 0 }, { 43, 1, 5.62, 0 }, { 62, 1, 7.92, 1 }, { 72, 1, 7.92, 1 }, { 57, 1, 6.19, 0 }, { 39, 1, 2.37, 0 }, { 51, 0, 5.84, 0 }, { 73, 1, 5.94, 0 }, { 41, 1, 3.82, 0 }, { 35, 0, 2.35, 0 }, { 69, 0, 6.57, 1 }, { 75, 1, 7.96, 1 }, { 51, 1, 3.96, 0 }, { 61, 1, 4.36, 0 }, { 55, 0, 3.84, 0 }, { 45, 1, 3.02, 0 }, { 48, 0, 4.65, 0 }, { 77, 0, 7.93, 1 }, { 40, 1, 2.46, 0 }, { 37, 1, 2.32, 0 }, { 78, 0, 7.88, 1 }, { 39, 1, 4.55, 0 }, { 41, 0, 2.45, 0 }, { 54, 1, 5.62, 0 }, { 59, 1, 5.03, 0 }, { 78, 0, 8.08, 1 }, { 56, 1, 6.96, 1 }, { 49, 1, 3.07, 0 }, { 48, 0, 4.75, 0 }, { 63, 1, 5.64, 0 }, { 50, 0, 3.35, 0 }, { 59, 1, 5.08, 0 }, { 60, 0, 6.58, 1 }, { 64, 0, 5.19, 0 }, { 76, 1, 6.69, 1 }, { 58, 0, 5.18, 0 }, { 48, 1, 4.47, 0 }, { 72, 0, 8.70, 1 }, { 40, 1, 5.14, 0 }, { 53, 0, 3.40, 0 }, { 79, 0, 9.77, 1 }, { 61, 1, 7.79, 1 }, { 59, 0, 7.42, 1 }, { 44, 0, 2.55, 0 }, { 52, 1, 3.71, 0 }, { 80, 1, 7.56, 1 }, { 76, 0, 7.80, 1 }, { 51, 0, 5.94, 0 }, { 46, 1, 5.52, 0 }, { 48, 0, 3.25, 0 }, { 58, 1, 4.71, 0 }, { 44, 1, 2.52, 0 }, { 68, 0, 8.38, 1 }, }; double[][] input = data.Submatrix(null, 0, 2).ToJagged(); double[] output = data.GetColumn(3); var regression = new LogisticRegression(3); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 1e-10; var errors = new List <double>(); for (int i = 0; i < 1000; i++) { errors.Add(teacher.Run(input, output)); } double error = 0; for (int i = 0; i < output.Length; i++) { double expected = output[i]; double actual = System.Math.Round(regression.Compute(input[i])); if (expected != actual) { error++; } } error /= output.Length; Assert.AreEqual(error, 0); Assert.AreEqual(-490.30977151704076, regression.Coefficients[0], 1e-7); Assert.AreEqual(1.7763049293456503, regression.Coefficients[1], 1e-7); Assert.AreEqual(-14.882619671822592, regression.Coefficients[2], 1e-7); Assert.AreEqual(60.5066623676452, regression.Coefficients[3], 1e-7); }
public void RegressTest() { double[,] inputGrouped = { { 1, 4, 5 }, // product 1 has four occurrences of class 1 and five of class 0 { 2, 1, 3 }, // product 2 has one occurrence of class 1 and three of class 0 }; double[,] inputGroupProb = { { 1, 4.0 / (4 + 5) }, // product 1 has 0.44 probability of belonging to class 1 { 2, 1.0 / (1 + 3) }, // product 2 has 0.25 probability of belonging to class 1 }; double[,] inputExtended = { { 1, 1 }, // observation of product 1 in class 1 { 1, 1 }, // observation of product 1 in class 1 { 1, 1 }, // observation of product 1 in class 1 { 1, 1 }, // observation of product 1 in class 1 { 1, 0 }, // observation of product 1 in class 0 { 1, 0 }, // observation of product 1 in class 0 { 1, 0 }, // observation of product 1 in class 0 { 1, 0 }, // observation of product 1 in class 0 { 1, 0 }, // observation of product 1 in class 0 { 2, 1 }, // observation of product 2 in class 1 { 2, 0 }, // observation of product 2 in class 0 { 2, 0 }, // observation of product 2 in class 0 { 2, 0 }, // observation of product 2 in class 0 }; // Fit using extended data double[][] inputs = Matrix.ColumnVector(inputExtended.GetColumn(0)).ToArray(); double[] outputs = inputExtended.GetColumn(1); LogisticRegression target = new LogisticRegression(1); IterativeReweightedLeastSquares irls = new IterativeReweightedLeastSquares(target); irls.Run(inputs, outputs); // Fit using grouped data double[][] inputs2 = Matrix.ColumnVector(inputGroupProb.GetColumn(0)).ToArray(); double[] outputs2 = inputGroupProb.GetColumn(1); LogisticRegression target2 = new LogisticRegression(1); IterativeReweightedLeastSquares irls2 = new IterativeReweightedLeastSquares(target2); irls2.Run(inputs2, outputs2); Assert.IsTrue(Matrix.IsEqual(target.Coefficients, target2.Coefficients, 0.000001)); double[,] data = new double[, ] { { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 1 }, { 6, 0 }, { 7, 1 }, { 8, 0 }, { 9, 1 }, { 10, 1 } }; double[][] inputs3 = Matrix.ColumnVector(data.GetColumn(0)).ToArray(); double[] outputs3 = data.GetColumn(1); LogisticRegressionAnalysis analysis = new LogisticRegressionAnalysis(inputs3, outputs3); analysis.Compute(); Assert.IsFalse(double.IsNaN(analysis.Deviance)); Assert.IsFalse(double.IsNaN(analysis.ChiSquare.PValue)); Assert.AreEqual(analysis.Deviance, 8.6202, 0.0005); Assert.AreEqual(analysis.ChiSquare.PValue, 0.0278, 0.0005); // Check intercept Assert.IsFalse(double.IsNaN(analysis.Coefficients[0].Value)); Assert.AreEqual(analysis.Coefficients[0].Value, -4.3578, 0.0005); // Check coefficients Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].Value)); Assert.AreEqual(analysis.Coefficients[1].Value, 0.6622, 0.0005); // Check statistics Assert.AreEqual(analysis.Coefficients[1].StandardError, 0.4001, 0.0005); Assert.AreEqual(analysis.Coefficients[1].Wald.PValue, 0.0979, 0.0005); Assert.AreEqual(analysis.Coefficients[1].OddsRatio, 1.9391, 0.0005); Assert.AreEqual(analysis.Coefficients[1].ConfidenceLower, 0.8852, 0.0005); Assert.AreEqual(analysis.Coefficients[1].ConfidenceUpper, 4.2478, 0.0005); Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].Wald.PValue)); Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].StandardError)); Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].OddsRatio)); Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].ConfidenceLower)); Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].ConfidenceUpper)); }
public void learn_new_mechanism() { Accord.Math.Random.Generator.Seed = 0; #region doc_log_reg_1 // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[][] input = { // age, smokes?, had cancer? new double[] { 55, 0 }, // false - no cancer new double[] { 28, 0 }, // false new double[] { 65, 1 }, // false new double[] { 46, 0 }, // true - had cancer new double[] { 86, 1 }, // true new double[] { 56, 1 }, // true new double[] { 85, 0 }, // false new double[] { 33, 0 }, // false new double[] { 21, 1 }, // false new double[] { 42, 1 }, // true }; bool[] output = // Whether each patient had lung cancer or not { false, false, false, true, true, true, false, false, false, true }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking), learned // using a method called "Iteratively Reweighted Least Squares": var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; // Now, we can use the learner to finally estimate our model: LogisticRegression regression = learner.Learn(input, output); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 // We can also obtain confidence intervals for the odd ratios: DoubleRange ageRange = regression.GetConfidenceInterval(1); // { 0.955442466180864, 1.09075592717851 } DoubleRange smokeRange = regression.GetConfidenceInterval(2); // { 0.326598216009923, 105.088535240304 } // If we would like to use the model to predict a probability for // each patient regarding whether they are at risk of cancer or not, // we can use the Probability function: double[] scores = regression.Probability(input); // Finally, if we would like to arrive at a conclusion regarding // each patient, we can use the Decide method, which will transform // the probabilities (from 0 to 1) into actual true/false values: bool[] actual = regression.Decide(input); #endregion double[] expected = { 0.21044171509541, 0.132425274863516, 0.657478034489772, 0.181224847711481, 0.747556618035989, 0.614500418479497, 0.331167053803838, 0.144741108525755, 0.436271096256738, 0.544193832738005 }; string str = scores.ToCSharp(); for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], scores[i], 1e-8); } double[] transform = regression.Transform(input, scores); for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], transform[i], 1e-8); } Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); Assert.IsTrue(new[] { 0.955442466180864, 1.09075592717851 }.IsEqual(ageRange, atol: 1e-10)); Assert.IsTrue(new[] { 0.326598216009923, 105.088535240304 }.IsEqual(smokeRange, atol: 1e-10)); Assert.IsFalse(actual[0]); Assert.IsFalse(actual[1]); Assert.IsTrue(actual[2]); Assert.IsFalse(actual[3]); Assert.IsTrue(actual[4]); Assert.IsTrue(actual[5]); Assert.IsFalse(actual[6]); Assert.IsFalse(actual[7]); Assert.IsFalse(actual[8]); Assert.IsTrue(actual[9]); }
public void scores_probabilities_test() { double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; double[] output = { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1 }; double[] weights = { 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0 }; var teacher = new IterativeReweightedLeastSquares <LogisticRegression>() { Regularization = 0 }; var target = teacher.Learn(input, output, weights); LogitLinkFunction link = (LogitLinkFunction)target.Link; Assert.AreEqual(0, link.A); Assert.AreEqual(1, link.B); Assert.AreEqual(-2.4577464307294092, target.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, target.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, target.Coefficients[1], 1e-8); Assert.AreEqual(1.7678893101571855, target.Coefficients[2], 1e-8); // Test Scores, LogLikelihoods and Probability functions // https://github.com/accord-net/framework/issues/570 double[][] scoresAllSamples = target.Scores(input); double[][] logLikelihoodsAllSamples = target.LogLikelihoods(input); double[][] probabilitiesAllSamples = target.Probabilities(input); Assert.IsTrue(scoresAllSamples.IsEqual(Matrix.Apply(probabilitiesAllSamples, link.Function), rtol: 1e-5)); Assert.IsTrue(probabilitiesAllSamples.IsEqual(logLikelihoodsAllSamples.Exp())); Assert.IsTrue(probabilitiesAllSamples.Sum(dimension: 1).IsEqual(Vector.Ones(11), rtol: 1e-6)); bool[] decideAllSamples = target.Decide(input); double err = new ZeroOneLoss(output).Loss(decideAllSamples); Assert.AreEqual(0.18181818181818182, err, 1e-5); Assert.AreEqual(decideAllSamples, scoresAllSamples.ArgMax(dimension: 1).ToBoolean()); Assert.AreEqual(decideAllSamples.ToInt32(), logLikelihoodsAllSamples.ArgMax(dimension: 1)); Assert.AreEqual(decideAllSamples, probabilitiesAllSamples.ArgMax(dimension: 1).ToBoolean()); double[] scoreAllSamples = target.Score(input); Assert.AreEqual(scoreAllSamples, scoresAllSamples.GetColumn(1)); double[] logLikelihoodAllSamples = target.LogLikelihood(input); Assert.AreEqual(logLikelihoodAllSamples, logLikelihoodsAllSamples.GetColumn(1)); double[] probabilityAllSamples = target.Probability(input); Assert.AreEqual(probabilityAllSamples, probabilitiesAllSamples.GetColumn(1)); for (int i = 0; i < input.Length; i++) { double[] scoresOneSample = target.Scores(input[i]); Assert.AreEqual(scoresOneSample, scoresAllSamples[i]); double[] logLikelihoodsOneSample = target.LogLikelihoods(input[i]); Assert.AreEqual(logLikelihoodsOneSample, logLikelihoodsAllSamples[i]); double[] probabilitiesOneSample = target.Probabilities(input[i]); Assert.AreEqual(probabilitiesOneSample, probabilitiesAllSamples[i]); bool decideOneSample = target.Decide(input[i]); Assert.AreEqual(decideOneSample, decideAllSamples[i]); double scoreOneSample = target.Score(input[i]); Assert.AreEqual(scoreOneSample, scoreAllSamples[i]); double logLikelihoodOneSample = target.LogLikelihood(input[i]); Assert.AreEqual(logLikelihoodOneSample, logLikelihoodAllSamples[i]); double probabilityOneSample = target.Probability(input[i]); Assert.AreEqual(probabilityOneSample, probabilityAllSamples[i]); } bool[] decideScoresAllSamples = null; target.Scores(input, ref decideScoresAllSamples); bool[] decideLogLikelihoodsAllSamples = null; target.LogLikelihoods(input, ref decideLogLikelihoodsAllSamples); Assert.AreEqual(decideScoresAllSamples, decideLogLikelihoodsAllSamples); bool[] decideProbabilitiesAllSamples = null; target.Probabilities(input, ref decideProbabilitiesAllSamples); Assert.AreEqual(decideScoresAllSamples, decideProbabilitiesAllSamples); bool[] decideScoreAllSamples = null; target.Score(input, ref decideScoreAllSamples); Assert.AreEqual(decideScoreAllSamples, decideScoresAllSamples); bool[] decideLogLikelihoodAllSamples = null; target.LogLikelihood(input, ref decideLogLikelihoodAllSamples); Assert.AreEqual(decideScoreAllSamples, decideLogLikelihoodAllSamples); bool[] decideProbabilityAllSamples = null; target.Probability(input, ref decideProbabilityAllSamples); Assert.AreEqual(decideScoreAllSamples, decideProbabilityAllSamples); for (int i = 0; i < input.Length; i++) { bool decideScoresOneSample; target.Scores(input[i], out decideScoresOneSample); Assert.AreEqual(decideScoresOneSample, decideScoresAllSamples[i]); bool decideLogLikelihoodsOneSample; target.LogLikelihoods(input[i], out decideLogLikelihoodsOneSample); Assert.AreEqual(decideLogLikelihoodsOneSample, decideLogLikelihoodsAllSamples[i]); bool decideProbabilitiesOneSample; target.Probabilities(input[i], out decideProbabilitiesOneSample); Assert.AreEqual(decideProbabilitiesOneSample, decideProbabilitiesAllSamples[i]); bool decideScoreOneSample; target.Score(input[i], out decideScoreOneSample); Assert.AreEqual(decideScoreOneSample, decideScoreAllSamples[i]); bool decideLogLikelihoodOneSample; target.LogLikelihood(input[i], out decideLogLikelihoodOneSample); Assert.AreEqual(decideLogLikelihoodOneSample, decideLogLikelihoodAllSamples[i]); bool decideProbabilityOneSample; target.Probability(input: input[i], decision: out decideProbabilityOneSample); Assert.AreEqual(decideProbabilityOneSample, decideProbabilityAllSamples[i]); } //bool[][] decidesScoresAllSamples = null; target.Scores(input, ref decidesScoresAllSamples); //bool[][] decidesLogLikelihoodsAllSamples = null; target.LogLikelihoods(input, ref decidesLogLikelihoodsAllSamples); //bool[][] decidesProbabilitiesAllSamples = null; target.Probabilities(input, ref decidesProbabilitiesAllSamples); //bool[][] decidesScoreAllSamples = null; target.Score(input, ref decidesScoreAllSamples); //bool[][] decidesLogLikelihoodAllSamples = null; target.LogLikelihood(input, ref decidesLogLikelihoodAllSamples); //bool[][] decidesProbabilityAllSamples = null; target.Probability(input, ref decidesProbabilityAllSamples); }
/// <summary> /// Computes the Logistic Regression Analysis. /// </summary> /// /// <remarks>The likelihood surface for the /// logistic regression learning is convex, so there will be only one /// peak. Any local maxima will be also a global maxima. /// </remarks> /// /// <param name="limit"> /// The difference between two iterations of the regression algorithm /// when the algorithm should stop. If not specified, the value of /// 10e-4 will be used. The difference is calculated based on the largest /// absolute parameter change of the regression. /// </param> /// /// <param name="maxIterations"> /// The maximum number of iterations to be performed by the regression /// algorithm. /// </param> /// /// <returns> /// True if the model converged, false otherwise. /// </returns> /// public bool Compute(double limit, int maxIterations) { double delta; int iteration = 0; var learning = new IterativeReweightedLeastSquares(regression); do // learning iterations until convergence { delta = learning.Run(inputData, outputData); iteration++; } while (delta > limit && iteration < maxIterations); // Check if the full model has converged bool converged = iteration < maxIterations; // Store model information this.result = regression.Compute(inputData); this.deviance = regression.GetDeviance(inputData, outputData); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData); this.chiSquare = regression.ChiSquare(inputData, outputData); // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.standardErrors[i] = regression.StandardErrors[i]; this.waldTests[i] = regression.GetWaldTest(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.oddsRatios[i] = regression.GetOddsRatio(i); } // Perform likelihood-ratio tests against diminished nested models LogisticRegression innerModel = new LogisticRegression(inputCount - 1); learning = new IterativeReweightedLeastSquares(innerModel); for (int i = 0; i < inputCount; i++) { // Create a diminished inner model without the current variable double[][] data = inputData.RemoveColumn(i); iteration = 0; do // learning iterations until convergence { delta = learning.Run(data, outputData); iteration++; } while (delta > limit && iteration < maxIterations); double ratio = 2.0 * (logLikelihood - innerModel.GetLogLikelihood(data, outputData)); ratioTests[i + 1] = new ChiSquareTest(ratio, 1); } // Returns true if the full model has converged, false otherwise. return(converged); }
public void ComputeTest() { // Example from http://bayes.bgsu.edu/bcwr/vignettes/probit_regression.pdf double[][] input = { new double[] { 525 }, new double[] { 533 }, new double[] { 545 }, new double[] { 582 }, new double[] { 581 }, new double[] { 576 }, new double[] { 572 }, new double[] { 609 }, new double[] { 559 }, new double[] { 543 }, new double[] { 576 }, new double[] { 525 }, new double[] { 574 }, new double[] { 582 }, new double[] { 574 }, new double[] { 471 }, new double[] { 595 }, new double[] { 557 }, new double[] { 557 }, new double[] { 584 }, new double[] { 599 }, new double[] { 517 }, new double[] { 649 }, new double[] { 584 }, new double[] { 463 }, new double[] { 591 }, new double[] { 488 }, new double[] { 563 }, new double[] { 553 }, new double[] { 549 } }; double[] output = { 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1 }; var regression = new GeneralizedLinearRegression(new ProbitLinkFunction(), inputs: 1); var teacher = new IterativeReweightedLeastSquares(regression); double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 1e-6); Assert.AreEqual(2, regression.Coefficients.Length); Assert.AreEqual(-17.6984, regression.Coefficients[0], 1e-4); Assert.AreEqual(0.03293, regression.Coefficients[1], 1e-4); Assert.AreEqual(2, regression.StandardErrors.Length); Assert.AreEqual(9.2731983954911374, regression.StandardErrors[0], 1e-5); Assert.AreEqual(0.016768779446085, regression.StandardErrors[1], 1e-6); }
public void learn_test() { #region doc_learn // The Bag-Of-Words model can be used to extract finite-length feature // vectors from sequences of arbitrary length, like for example, texts: string[] texts = { @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas molestie malesuada nisi et placerat. Curabitur blandit porttitor suscipit. Nunc facilisis ultrices felis, vitae luctus arcu semper in. Fusce ut felis ipsum. Sed faucibus tortor ut felis placerat euismod. Vestibulum pharetra velit et dolor ornare quis malesuada leo aliquam. Aenean lobortis, tortor iaculis vestibulum dictum, tellus nisi vestibulum libero, ultricies pretium nisi ante in neque. Integer et massa lectus. Aenean ut sem quam. Mauris at nisl augue, volutpat tempus nisl. Suspendisse luctus convallis metus, vitae pretium risus pretium vitae. Duis tristique euismod aliquam", @"Sed consectetur nisl et diam mattis varius. Aliquam ornare tincidunt arcu eget adipiscing. Etiam quis augue lectus, vel sollicitudin lorem. Fusce lacinia, leo non porttitor adipiscing, mauris purus lobortis ipsum, id scelerisque erat neque eget nunc. Suspendisse potenti. Etiam non urna non libero pulvinar consequat ac vitae turpis. Nam urna eros, laoreet id sagittis eu, posuere in sapien. Phasellus semper convallis faucibus. Nulla fermentum faucibus tellus in rutrum. Maecenas quis risus augue, eu gravida massa." }; string[][] words = texts.Tokenize(); // Create a new BoW with options: var codebook = new BagOfWords() { MaximumOccurance = 1 // the resulting vector will have only 0's and 1's }; // Compute the codebook (note: this would have to be done only for the training set) codebook.Learn(words); // Now, we can use the learned codebook to extract fixed-length // representations of the different texts (paragraphs) above: // Extract a feature vector from the text 1: double[] bow1 = codebook.Transform(words[0]); // Extract a feature vector from the text 2: double[] bow2 = codebook.Transform(words[1]); // we could also have transformed everything at once, i.e. // double[][] bow = codebook.Transform(words); // Now, since we have finite length representations (both bow1 and bow2 should // have the same size), we can pass them to any classifier or machine learning // method. For example, we can pass them to a Logistic Regression Classifier to // discern between the first and second paragraphs // Lets create a Logistic classifier to separate the two paragraphs: var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; // Now, we use the learning algorithm to learn the distinction between the two: LogisticRegression reg = learner.Learn(new[] { bow1, bow2 }, new[] { false, true }); // Finally, we can predict using the classifier: bool c1 = reg.Decide(bow1); // Should be false bool c2 = reg.Decide(bow2); // Should be true #endregion Assert.AreEqual(bow1.Length, 99); Assert.AreEqual(bow2.Length, 99); Assert.AreEqual(bow1.Sum(), 67); Assert.AreEqual(bow2.Sum(), 63); Assert.IsFalse(c1); Assert.IsTrue(c2); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking). LogisticRegression regression = new LogisticRegression(inputs: 2); // Next, we are going to estimate this model. For this, we // will use the Iteratively Reweighted Least Squares method. var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 0; // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 0.001); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 double[] actual = new double[output.Length]; for (int i = 0; i < input.Length; i++) { actual[i] = regression.Compute(input[i]); } double[] expected = { 0.21044171560168326, 0.13242527535212373, 0.65747803433771812, 0.18122484822324372, 0.74755661773156912, 0.61450041841477232, 0.33116705418194975, 0.14474110902457912, 0.43627109657399382, 0.54419383282533118 }; for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i]); } Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8); Assert.IsFalse(double.IsNaN(ageOdds)); Assert.IsFalse(double.IsNaN(smokeOdds)); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dicotomic and give whether // they smoke or not (This is completely fictional data). double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking). LogisticRegression regression = new LogisticRegression(inputs: 2); // Next, we are going to estimate this model. For this, we // will use the Iteravely reweighted least squares method. var teacher = new IterativeReweightedLeastSquares(regression); // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 0.001); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarrete smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 Assert.AreEqual(1.0208597028836701, ageOdds); Assert.AreEqual(5.8584748789881331, smokeOdds); }
/// <summary> /// Uses data from <paramref name="fileName">fileName</paramref> to train a logistic regression model./> /// </summary> /// <param name="fileName">The name of the data file.</param> /// <returns>A string to print giving information about the weights and odds ratios.</returns> public static string Learn(string fileName) { //Read all inputs and outputs from training file. string[] lines = File.ReadAllLines("Logistic Regression Model/data/" + fileName + ".txt"); double[][] inputs = new double[lines.Length][]; int[] outputs = new int[lines.Length]; for (int a = 0; a < lines.Length; a++) { string[] split = lines[a].Split(':'); //Dynamically get variables from file. string[] scores = split[1].Split('&'); inputs[a] = new double[scores.Length]; for (int b = 0; b < scores.Length; b++) { inputs[a][b] = double.Parse(scores[b]); } outputs[a] = int.Parse(split[2]); } //Set up Accord.NET learner. IterativeReweightedLeastSquares <LogisticRegression> learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, MaxIterations = 100, Regularization = 1e-10 }; //Shuffle the input and output pairs to eliminate some inherent bias from //training data. Dictionary <double[], int> map = inputs.Zip(outputs, (arg1, arg2) => new { arg1, arg2 }).ToDictionary(x => x.arg1, x => x.arg2); map.Shuffle(); inputs = map.Keys.ToArray(); outputs = map.Values.ToArray(); //Train Regression LogisticRegression regression = learner.Learn(inputs, outputs.ToBoolArray()); //Save to a Model file. int counter = 0; while (File.Exists("Logistic Regression Model/models/Model-" + counter + ".txt")) { counter++; } //Create a file writer FileStream fs = File.Create("Logistic Regression Model/models/Model-" + counter + ".txt"); StreamWriter writer = new StreamWriter(fs); //Print the weights string result = "Weights: " + regression.Weights.GetString() + "\n"; //Write lines. writer.WriteLine(regression.Weights.Append(regression.Intercept).ToArray().GetString()); for (int c = 0; c < regression.Weights.Length; c++) { writer.WriteLine(regression.GetOddsRatio(c)); result += "Odds Ratio " + c + ": " + regression.GetOddsRatio(c) + "\n"; } //Get Loss values. double[] actual = new double[inputs.Length]; double[] expected = new double[outputs.Length]; for (int a = 0; a < actual.Length; a++) { actual[a] = regression.Probability(inputs[a]); expected[a] = outputs[a]; } //Calculate and print square loss. string loss = "Loss: " + new SquareLoss(expected) { Mean = true, Root = true }.Loss(actual); result += loss + "\n"; writer.WriteLine(loss); Console.WriteLine("\n\n" + loss); //Calculate and print R-squared Loss string r2 = "R2: " + new RSquaredLoss(inputs[0].Length, expected).Loss(actual); result += r2; writer.WriteLine(r2); //Cleanup writer.Close(); writer.Dispose(); fs.Close(); fs.Dispose(); Console.WriteLine("Model trained successfully!"); Console.WriteLine("\nEvaluating...\n"); //Get the VIFs float[] VIFs = CalculateVIFs(inputs); //Log it for (int a = 0; a < VIFs.Length; a++) { Logger.Log("Variance Inflation Factor #" + a + ": " + VIFs[a]); } return(result); }
private void regression(List <Cell> samplePoints) { // 构造输入输出数据集 // 样本数目 int COUNT = samplePoints.Count; // 构造输入和输出数据集 double[][] inputs = new double[COUNT][]; bool[] outputs = new bool[COUNT]; for (int i = 0; i < COUNT; i++) { Cell cell = samplePoints[i]; int pos = cell.row * width + cell.col; inputs[i] = (from buffer in driveBuffers select buffer[pos]).ToArray <double>(); outputs[i] = cell.type; } var learner = new IterativeReweightedLeastSquares <Accord.Statistics.Models.Regression.LogisticRegression>() { Tolerance = 1e-8, // 收敛参数 Iterations = 20, // 最大循环数目 Regularization = 0, ComputeStandardErrors = true }; Accord.Statistics.Models.Regression.LogisticRegression regression = learner.Learn(inputs, outputs); // 输出 odds StringBuilder strb = new StringBuilder(); for (int i = 0; i <= inputs[0].Length; i++) { strb.AppendLine(" " + i + " : " + regression.GetOddsRatio(i)); } updateConsoleEvent(strb.ToString()); // 输出 weights StringBuilder strw = new StringBuilder(); strw.AppendLine("权重系数:"); strw.AppendLine("截距: " + regression.Intercept.ToString()); var weights = regression.Weights; for (int i = 0; i < weights.Length; i++) { strw.AppendLine("权重" + (i + 1) + ":" + weights[i]); } updateConsoleEvent(strw.ToString()); double[] result = new double[width * height]; double minProp = double.MaxValue; double[] minInput = null; for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) { int pos = row * width + col; if (beginBuffer[pos] < 0 || !IsValid(pos)) { result[pos] = this.landUse.NullInfo.LandUseTypeValue; continue; } double[] input = (from buffer in driveBuffers select buffer[pos]).ToArray <double>(); double prop = regression.Probability(input); if (prop < minProp) { minProp = prop; minInput = input; } result[pos] = prop; } } // 新建 GDAL dataset OSGeo.GDAL.Driver driver = OSGeo.GDAL.Gdal.GetDriverByName("GTIFF"); OSGeo.GDAL.Dataset dataset = driver.Create(this.ResultLayerName, width, height, 1, OSGeo.GDAL.DataType.GDT_Float64, null); dataset.WriteRaster(0, 0, width, height, result, width, height, 1, new int[1] { 1 }, 0, 0, 0); dataset.FlushCache(); }
public double[] GetResult() { // 采样 List <Cell> samplePoints = getSample(this.NumberOfSample); // 样本数目 int COUNT = samplePoints.Count; // 构造输入和输出数据集 double[][] inputs = new double[COUNT][]; bool[] outputs = new bool[COUNT]; for (int i = 0; i < COUNT; i++) { Cell cell = samplePoints[i]; int pos = cell.row * width + cell.col; inputs[i] = (from buffer in driveBuffers select buffer[pos]).ToArray <double>(); outputs[i] = cell.type; } var learner = new IterativeReweightedLeastSquares <Accord.Statistics.Models.Regression.LogisticRegression>() { Tolerance = 1e-8, // 收敛参数 Iterations = 20, // 最大循环数目 Regularization = 0, ComputeStandardErrors = true }; Accord.Statistics.Models.Regression.LogisticRegression regression = learner.Learn(inputs, outputs); //// 输出 odds //StringBuilder strb = new StringBuilder(); //for (int i = 0; i <= inputs[0].Length; i++) //{ // strb.AppendLine(" " + i + " : " + regression.GetOddsRatio(i)); //} //updateConsoleEvent(strb.ToString()); //// 输出 weights //StringBuilder strw = new StringBuilder(); //strw.AppendLine("权重系数:"); //strw.AppendLine("截距: " + regression.Intercept.ToString()); //var weights = regression.Weights; //for (int i = 0; i < weights.Length; i++) //{ // strw.AppendLine("权重" + (i + 1) + ":" + weights[i]); //} //updateConsoleEvent(strw.ToString()); double[] result = new double[width * height]; double minProp = double.MaxValue; double[] minInput = null; for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) { int pos = row * width + col; if (beginBuffer[pos] < 0 || !IsValid(pos)) { result[pos] = this.landUse.NullInfo.LandUseTypeValue; continue; } double[] input = (from buffer in driveBuffers select buffer[pos]).ToArray <double>(); double prop = regression.Probability(input); if (prop < minProp) { minProp = prop; minInput = input; } result[pos] = prop; } } return(result); }
public void learn_new_mechanism() { Accord.Math.Random.Generator.Seed = 0; #region doc_log_reg_1 // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[][] input = { // age, smokes?, had cancer? new double[] { 55, 0 }, // false - no cancer new double[] { 28, 0 }, // false new double[] { 65, 1 }, // false new double[] { 46, 0 }, // true - had cancer new double[] { 86, 1 }, // true new double[] { 56, 1 }, // true new double[] { 85, 0 }, // false new double[] { 33, 0 }, // false new double[] { 21, 1 }, // false new double[] { 42, 1 }, // true }; bool[] output = // Whether each patient had lung cancer or not { false, false, false, true, true, true, false, false, false, true }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking), learned // using a method called "Iteratively Reweighted Least Squares": var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; // Now, we can use the learner to finally estimate our model: LogisticRegression regression = learner.Learn(input, output); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 // If we would like to use the model to predict a probability for // each patient regarding whether they are at risk of cancer or not, // we can use the Probability function: double[] scores = regression.Probability(input); // Finally, if we would like to arrive at a conclusion regarding // each patient, we can use the Decide method, which will transform // the probabilities (from 0 to 1) into actual true/false values: bool[] actual = regression.Decide(input); #endregion double[] expected = { 0.26653094409723, 0.152638465629209, 1.91952079193046, 0.221336525913065, 2.96128427776555, 1.59403653839456, 0.495141657849358, 0.169236601885844, 0.773902301904016, 1.1939150275367 }; string str = scores.ToCSharp(); for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], scores[i], 1e-8); } double[] transform = regression.Transform(input, scores); for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], transform[i], 1e-8); } Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); Assert.IsFalse(actual[0]); Assert.IsFalse(actual[1]); Assert.IsTrue(actual[2]); Assert.IsFalse(actual[3]); Assert.IsTrue(actual[4]); Assert.IsTrue(actual[5]); Assert.IsFalse(actual[6]); Assert.IsFalse(actual[7]); Assert.IsFalse(actual[8]); Assert.IsTrue(actual[9]); }
public void ComputeTest3() { double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; double[] output = { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1 }; double[] weights = { 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0 }; LogisticRegression regression = new LogisticRegression(inputs: 2); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 0; double delta = 0; do { delta = teacher.Run(input, output, weights); } while (delta > 0.001); double ageOdds = regression.GetOddsRatio(1); double smokeOdds = regression.GetOddsRatio(2); Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8); Assert.IsFalse(double.IsNaN(ageOdds)); Assert.IsFalse(double.IsNaN(smokeOdds)); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-8); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); }
public static void Execute() { double[][] input = { new double[] { 55, 0 }, new double[] { 28, 0 }, new double[] { 65, 0 }, new double[] { 46, 0 }, new double[] { 86, 0 }, new double[] { 56, 0 }, new double[] { 85, 0 }, new double[] { 33, 0 }, new double[] { 21, 0 }, new double[] { 42, 0 }, }; double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; LogisticRegression regression = new LogisticRegression(2); var trainer = new IterativeReweightedLeastSquares(regression); double delta = 0; do { // Perform an iteration delta = trainer.Run(input, output); } while (delta > 0.001); var b1 = regression.Coefficients[1]; var b2 = regression.Coefficients[2]; var b0 = regression.Intercept; System.Console.WriteLine(b0); System.Console.WriteLine(b1); System.Console.WriteLine(b2); var func = new Func <double, double, double>((x1, x2) => { var result = 1 / (1 + Math.Exp(-b0 - b1 * x1 - b2 * x2)); return(result); }); var age = 79; var smoking = 0; var r = func(age, smoking); System.Console.WriteLine("input x [age:{0}, smoking:{1}] is {2}", age, smoking, r); LogisticRegression LR = new LogisticRegression(); LR.NumberOfInputs = 1; var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; LR = learner.Learn(input, output); System.Console.WriteLine(LR.Intercept); System.Console.WriteLine(LR.GetOddsRatio(1) - 1); System.Console.WriteLine(LR.GetOddsRatio(2) - 1); double [] test = new double[] { 79, 0 }; System.Console.WriteLine(LR.Probability(test)); }
public void LargeCoefficientsTest() { double[,] data = { { 48, 1, 4.40, 0 }, { 60, 0, 7.89, 1 }, { 51, 0, 3.48, 0 }, { 66, 0, 8.41, 1 }, { 40, 1, 3.05, 0 }, { 44, 1, 4.56, 0 }, { 80, 0, 6.91, 1 }, { 52, 0, 5.69, 0 }, { 58, 0, 4.01, 0 }, { 58, 0, 4.48, 0 }, { 72, 1, 5.97, 0 }, { 57, 0, 6.71, 1 }, { 55, 1, 5.36, 0 }, { 71, 0, 5.68, 0 }, { 44, 1, 4.61, 0 }, { 65, 1, 4.80, 0 }, { 38, 0, 5.06, 0 }, { 50, 0, 6.40, 0 }, { 80, 0, 6.67, 1 }, { 69, 1, 5.79, 0 }, { 39, 0, 5.42, 0 }, { 68, 0, 7.61, 1 }, { 47, 1, 3.24, 0 }, { 45, 1, 4.29, 0 }, { 79, 1, 7.44, 1 }, { 41, 1, 4.60, 0 }, { 45, 0, 5.91, 0 }, { 54, 0, 4.77, 0 }, { 43, 1, 5.62, 0 }, { 62, 1, 7.92, 1 }, { 72, 1, 7.92, 1 }, { 57, 1, 6.19, 0 }, { 39, 1, 2.37, 0 }, { 51, 0, 5.84, 0 }, { 73, 1, 5.94, 0 }, { 41, 1, 3.82, 0 }, { 35, 0, 2.35, 0 }, { 69, 0, 6.57, 1 }, { 75, 1, 7.96, 1 }, { 51, 1, 3.96, 0 }, { 61, 1, 4.36, 0 }, { 55, 0, 3.84, 0 }, { 45, 1, 3.02, 0 }, { 48, 0, 4.65, 0 }, { 77, 0, 7.93, 1 }, { 40, 1, 2.46, 0 }, { 37, 1, 2.32, 0 }, { 78, 0, 7.88, 1 }, { 39, 1, 4.55, 0 }, { 41, 0, 2.45, 0 }, { 54, 1, 5.62, 0 }, { 59, 1, 5.03, 0 }, { 78, 0, 8.08, 1 }, { 56, 1, 6.96, 1 }, { 49, 1, 3.07, 0 }, { 48, 0, 4.75, 0 }, { 63, 1, 5.64, 0 }, { 50, 0, 3.35, 0 }, { 59, 1, 5.08, 0 }, { 60, 0, 6.58, 1 }, { 64, 0, 5.19, 0 }, { 76, 1, 6.69, 1 }, { 58, 0, 5.18, 0 }, { 48, 1, 4.47, 0 }, { 72, 0, 8.70, 1 }, { 40, 1, 5.14, 0 }, { 53, 0, 3.40, 0 }, { 79, 0, 9.77, 1 }, { 61, 1, 7.79, 1 }, { 59, 0, 7.42, 1 }, { 44, 0, 2.55, 0 }, { 52, 1, 3.71, 0 }, { 80, 1, 7.56, 1 }, { 76, 0, 7.80, 1 }, { 51, 0, 5.94, 0 }, { 46, 1, 5.52, 0 }, { 48, 0, 3.25, 0 }, { 58, 1, 4.71, 0 }, { 44, 1, 2.52, 0 }, { 68, 0, 8.38, 1 }, }; double[][] input = data.Submatrix(null, 0, 2).ToArray(); double[] output = data.GetColumn(3); LogisticRegression regression = new LogisticRegression(3); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 1e-10; var errors = new List <double>(); for (int i = 0; i < 1000; i++) { errors.Add(teacher.Run(input, output)); } double error = 0; for (int i = 0; i < output.Length; i++) { double expected = output[i]; double actual = System.Math.Round(regression.Compute(input[i])); if (expected != actual) { error++; } } error /= output.Length; Assert.AreEqual(error, 0); Assert.AreEqual(-355.59378247276379, regression.Coefficients[0]); Assert.AreEqual(1.2646432605797491, regression.Coefficients[1]); Assert.AreEqual(-10.710529810144157, regression.Coefficients[2]); Assert.AreEqual(44.089493151268726, regression.Coefficients[3]); }
/// <summary> /// Run the lesson. /// </summary> public static void Run() { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // build the list of features we're going to use var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // train the model using a logistic regressor var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100 }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // get probabilities var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged(); var label_validation = validation["median_high_house_value"].Values.ToArray(); var probabilities = regression.Probability(features_validation); // calculate the histogram of probabilities var histogram = new Histogram(); histogram.Compute(probabilities, 0.05); // draw the histogram Plot(histogram, "Probability histogram", "prediction", "count"); // get predictions and actuals var predictions = regression.Decide(features_validation); var actuals = label_validation.Select(v => v == 1.0 ? true : false).ToArray(); // create confusion matrix var confusion = new ConfusionMatrix(predictions, actuals); // display classification scores Console.WriteLine($"True Positives: {confusion.TruePositives}"); Console.WriteLine($"True Negatives: {confusion.TrueNegatives}"); Console.WriteLine($"False Positives: {confusion.FalsePositives}"); Console.WriteLine($"False Negatives: {confusion.FalseNegatives}"); Console.WriteLine(); // display accuracy, precision, and recall Console.WriteLine($"Accuracy: {confusion.Accuracy}"); Console.WriteLine($"Precision: {confusion.Precision}"); Console.WriteLine($"Recall: {confusion.Recall}"); Console.WriteLine(); // display TPR and FPR Console.WriteLine($"TPR: {confusion.Sensitivity}"); Console.WriteLine($"FPR: {confusion.FalsePositiveRate}"); Console.WriteLine(); // calculate roc curve var roc = new ReceiverOperatingCharacteristic( actuals, predictions.Select(v => v ? 1 : 0).ToArray()); roc.Compute(100); // generate the scatter plot var rocPlot = roc.GetScatterplot(true); // show roc curve Plot(rocPlot); // show the auc Console.WriteLine($"AUC: {roc.Area}"); }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // create one-hot vectors for longitude and latitude Console.WriteLine("Binning longitude and latitude..."); var vectors_long = from l in housing["longitude"].Values select Vector.Create <double>( 1, (from b in Bin(-125, -114) select l >= b.Min && l < b.Max).ToArray()); var vectors_lat = from l in housing["latitude"].Values select Vector.Create <double>( 1, (from b in Bin(32, 43) select l >= b.Min && l < b.Max).ToArray()); // multiply vectors and create columns Console.WriteLine("Creating longxlat feature cross..."); var vectors_cross = vectors_long.Zip(vectors_lat, (lng, lat) => lng.Outer(lat)); for (var i = 0; i < 12; i++) { for (var j = 0; j < 12; j++) { housing.AddColumn($"location {i},{j}", from v in vectors_cross select v[i, j]); } } // set up model columns var columns = (from i in Enumerable.Range(0, 12) from j in Enumerable.Range(0, 12) select $"location {i},{j}").ToList(); columns.Add("housing_median_age"); columns.Add("total_rooms"); columns.Add("total_bedrooms"); columns.Add("population"); columns.Add("households"); columns.Add("median_income"); // create training, validation, and test partitions var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; //////////////////////////////////////////////////////////////////////// // Without regularization //////////////////////////////////////////////////////////////////////// // train the model Console.WriteLine("Training model without regularization..."); var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 50, Regularization = 0 }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // display training results Console.WriteLine("TRAINING WITHOUT REGULARIZATION"); Console.WriteLine($"Weights: {regression.Weights.ToString<double>("0.00")}"); Console.WriteLine($"Intercept: {regression.Intercept}"); Console.WriteLine(); // plot a histogram of the nonzero weights var histogram = new Histogram(); histogram.Compute(regression.Weights, 1.0); // set to 1.0 when regularization is disabled // draw the histogram Plot(histogram, "Without Regularization", "prediction", "count"); //////////////////////////////////////////////////////////////////////// // With regularization //////////////////////////////////////////////////////////////////////// // train the model Console.WriteLine("Training model with regularization..."); learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 50, Regularization = 50 }; regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // display training results Console.WriteLine("TRAINING WITH REGULARIZATION"); Console.WriteLine($"Weights: {regression.Weights.ToString<double>("0.00")}"); Console.WriteLine($"Intercept: {regression.Intercept}"); Console.WriteLine(); // plot a histogram of the nonzero weights histogram = new Histogram(); histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled // draw the histogram Plot(histogram, "With Regularization", "prediction", "count"); Console.ReadLine(); }
// Start is called before the first frame update void Start() { timeScale = 1; timeScaleAnt = timeScale; timesRessurect = 0; nLaps = -1; timeLap = 0; // Generating POD parameters CarInstantiate(); // Initializing Decision Trees var teacherT = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100, Regularization = 1e-6 }; var teacherS = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100, Regularization = 1e-6 }; dataSizeSt = 1; dataSizeTh = 1; InpThrust = new double[dataSizeTh][]; InpThrust[0] = new double[4]; OutThrust = new int[1]; OutThrust[0] = 1; InpSteer = new double[dataSizeSt][]; InpSteer[0] = new double[5]; OutSteer = new int[1]; OutSteer[0] = 0; // Use the learning algorithm to induce the tree double[][] inputsT0 = new double[1][]; inputsT0[0] = new double[4]; inputsT0[0][0] = 0.5f; inputsT0[0][1] = 10f; inputsT0[0][2] = 0.5f; inputsT0[0][3] = 65f; double[][] inputsS0 = new double[1][]; inputsS0[0] = new double[5]; inputsS0[0][0] = 0.5f; inputsS0[0][1] = 10f; inputsS0[0][2] = 0.5f; inputsS0[0][3] = 65f; inputsS0[0][4] = 1f; int[] outputs0 = new int[1]; outputs0[0] = 1; int[] outputs1 = new int[1]; outputs1[0] = 0; for (int i = 0; i < 4; i++) { InpThrust[0][i] = i; } for (int i = 0; i < 5; i++) { InpSteer[0][i] = i; } decisionThrust = teacherT.Learn(inputsT0, outputs0); decisionSteer = teacherS.Learn(inputsS0, outputs1); }