/// <summary> /// Performs a linear logistic regression analysis. /// </summary> /// <param name="outputIndex">The index of the column to predict.</param> /// <returns></returns> /// <remarks>Logistic linear regression is suited to situations where multiple input variables, either continuous or binary indicators, are used to predict /// the value of a binary output variable. Like a linear regression, a logistic linear regression tries to find a model that predicts the output variable using /// a linear combination of input variables. Unlike a simple linear regression, the model does not assume that this linear /// function predicts the output directly; instead it assumes that this function value is then fed into a logit link function, which /// maps the real numbers into the interval (0, 1), and interprets the value of this link function as the probability of obtaining success value /// for the output variable.</remarks> /// <exception cref="InvalidOperationException">The column to be predicted contains values other than 0 and 1.</exception> /// <exception cref="InsufficientDataException">There are not more rows in the sample than columns.</exception> public FitResult LogisticLinearRegression(int outputIndex) { if ((outputIndex < 0) || (outputIndex >= this.Dimension)) { throw new ArgumentOutOfRangeException("outputIndex"); } if (this.Count <= this.Dimension) { throw new InsufficientDataException(); } // Define the log likelihood as a function of the parameter set Func <IList <double>, double> logLikelihood = (IList <double> a) => { double L = 0.0; for (int k = 0; k < this.Count; k++) { double z = 0.0; for (int i = 0; i < this.storage.Length; i++) { if (i == outputIndex) { z += a[i]; } else { z += a[i] * this.storage[i][k]; } } double ez = Math.Exp(z); double y = this.storage[outputIndex][k]; if (y == 0.0) { L -= Math.Log(1.0 + ez); } else if (y == 1.0) { L -= Math.Log(1.0 + 1.0 / ez); } else { throw new InvalidOperationException(); } } return(L); }; double[] start = new double[this.Dimension]; //for (int i = 0; i < start.Length; i++) { // if (i != outputIndex) start[i] = this.TwoColumns(i, outputIndex).Covariance / this.Column(i).Variance / this.Column(outputIndex).Variance; //} MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start); FitResult result = new FitResult(maximum.Location, maximum.HessianMatrix.CholeskyDecomposition().Inverse(), null); return(result); }
public void EasomLocal() { Func <IList <double>, double> function = (IList <double> x) => Math.Cos(x[0]) * Math.Cos(x[1]) * Math.Exp(-(MoreMath.Sqr(x[0] - Math.PI) + MoreMath.Sqr(x[1] - Math.PI))); // We can't start too far from minimum, since cosines introduce many local minima. ColumnVector start = new ColumnVector(1.5, 2.0); MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(function, start); Console.WriteLine(maximum.Value); Assert.IsTrue(TestUtilities.IsNearlyEqual(maximum.Value, 1.0, new EvaluationSettings() { AbsolutePrecision = 2.0 * maximum.Precision })); Assert.IsTrue(TestUtilities.IsNearlyEqual(maximum.Location, new ColumnVector(Math.PI, Math.PI), new EvaluationSettings { AbsolutePrecision = 2.0 * Math.Sqrt(maximum.Precision) })); }
internal static DistributionFitResult <ContinuousDistribution> MaximumLikelihoodFit(IReadOnlyList <double> sample, Func <IReadOnlyList <double>, ContinuousDistribution> factory, IReadOnlyList <double> start, IReadOnlyList <string> names) { Debug.Assert(sample != null); Debug.Assert(factory != null); Debug.Assert(start != null); Debug.Assert(names != null); Debug.Assert(start.Count == names.Count); // Define a log likelihood function Func <IReadOnlyList <double>, double> logL = (IReadOnlyList <double> a) => { ContinuousDistribution d = factory(a); double lnP = 0.0; foreach (double value in sample) { double P = d.ProbabilityDensity(value); if (P == 0.0) { throw new InvalidOperationException(); } lnP += Math.Log(P); } return(lnP); }; // Maximize it MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logL, start); ColumnVector b = maximum.Location; SymmetricMatrix C = maximum.HessianMatrix; CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); ContinuousDistribution distribution = factory(maximum.Location); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new ContinuousDistributionFitResult(names, b, C, distribution, test)); }
internal MultiLinearLogisticRegressionResult(IReadOnlyList <bool> yColumn, IReadOnlyList <IReadOnlyList <double> > xColumns, IReadOnlyList <string> xNames) { Debug.Assert(yColumn != null); Debug.Assert(xColumns != null); Debug.Assert(xNames != null); Debug.Assert(xColumns.Count == xNames.Count); int n = yColumn.Count; int m = xColumns.Count; if (n <= m) { throw new InsufficientDataException(); } interceptIndex = -1; for (int c = 0; c < m; c++) { IReadOnlyList <double> xColumn = xColumns[c]; if (xColumn == null) { Debug.Assert(interceptIndex < 0); Debug.Assert(xNames[c] == "Intercept"); interceptIndex = c; } else { if (xColumn.Count != n) { throw new DimensionMismatchException(); } } } Debug.Assert(interceptIndex >= 0); // Define the log likelihood as a function of the parameter set Func <IReadOnlyList <double>, double> logLikelihood = (IReadOnlyList <double> a) => { Debug.Assert(a != null); Debug.Assert(a.Count == m); double L = 0.0; for (int k = 0; k < n; k++) { double t = 0.0; for (int i = 0; i < m; i++) { if (i == interceptIndex) { t += a[i]; } else { t += a[i] * xColumns[i][k]; } } double ez = Math.Exp(t); if (yColumn[k]) { L -= MoreMath.LogOnePlus(1.0 / ez); } else { L -= MoreMath.LogOnePlus(ez); } } return(L); }; // We need a better starting value. double[] start = new double[m]; //double[] start = new double[] { -1.5, +2.5, +0.5 }; // Search out the likelihood-maximizing parameter set. MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start); b = maximum.Location; CholeskyDecomposition CD = maximum.HessianMatrix.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); names = xNames; }