internal NonlinearRegressionResult( IReadOnlyList <double> x, IReadOnlyList <double> y, Func <IReadOnlyList <double>, double, double> function, IReadOnlyList <double> start, IReadOnlyList <string> names) { Debug.Assert(x != null); Debug.Assert(y != null); Debug.Assert(function != null); Debug.Assert(start != null); Debug.Assert(names != null); Debug.Assert(x.Count == y.Count); Debug.Assert(start.Count > 0); Debug.Assert(names.Count == start.Count); int n = x.Count; int d = start.Count; if (n <= d) { throw new InsufficientDataException(); } MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IReadOnlyList <double> a) => { double ss = 0.0; for (int i = 0; i < n; i++) { double r = y[i] - function(a, x[i]); ss += r * r; } return(ss); }, start); CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition(); if (cholesky == null) { throw new DivideByZeroException(); } b = min.Location; C = cholesky.Inverse(); C = (2.0 * min.Value / (n - d)) * C; sumOfSquaredResiduals = 0.0; residuals = new List <double>(n); for (int i = 0; i < n; i++) { double z = y[i] - function(b, x[i]); sumOfSquaredResiduals += z * z; residuals.Add(z); } this.names = names; this.function = function; }
public void SymmetricMatrixDecomposition() { for (int d = 1; d <= 4; d++) { SymmetricMatrix H = TestUtilities.CreateSymmetricHilbertMatrix(d); CholeskyDecomposition CD = H.CholeskyDecomposition(); Assert.IsTrue(CD != null, String.Format("d={0} not positive definite", d)); Assert.IsTrue(CD.Dimension == d); SymmetricMatrix HI = CD.Inverse(); SquareMatrix I = TestUtilities.CreateSquareUnitMatrix(d); Assert.IsTrue(TestUtilities.IsNearlyEqual(H * HI, I)); } }
/// <summary> /// Fits the data to an arbitrary parameterized function. /// </summary> /// <param name="function">The fit function.</param> /// <param name="start">An initial guess at the parameters.</param> /// <returns>A fit result containing the best-fitting function parameters /// and a χ<sup>2</sup> test of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public FitResult FitToFunction(Func <double[], T, double> function, double[] start) { if (function == null) { throw new ArgumentNullException(nameof(function)); } if (start == null) { throw new ArgumentNullException(nameof(start)); } // you can't do a fit with less data than parameters if (this.Count < start.Length) { throw new InsufficientDataException(); } /* * Func<IList<double>, double> function0 = (IList<double> x0) => { * double[] x = new double[x0.Count]; * x0.CopyTo(x, 0); * return(function(x)); * }; * MultiExtremum minimum0 = MultiFunctionMath.FindMinimum(function0, start); */ // create a chi^2 fit metric and minimize it FitMetric <T> metric = new FitMetric <T>(this, function); SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start); // compute the covariance (Hessian) matrix by inverting the curvature matrix SymmetricMatrix A = 0.5 * minimum.Curvature(); CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum if (CD == null) { throw new DivideByZeroException(); } SymmetricMatrix C = CD.Inverse(); // package up the results and return them TestResult test = new TestResult("ChiSquare", minimum.Value, TestType.RightTailed, new ChiSquaredDistribution(this.Count - minimum.Dimension)); FitResult fit = new FitResult(minimum.Location(), C, test); return(fit); }
/// <summary> /// Finds the parameterized function that best fits the data. /// </summary> /// <param name="f">The parameterized function.</param> /// <param name="start">An initial guess for the parameters.</param> /// <returns>The fit result.</returns> /// <remarks> /// <para> /// In the returned <see cref="FitResult"/>, the parameters appear in the same order as in /// the supplied fit function and initial guess vector. No goodness-of-fit test is returned. /// </para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="f"/> or <paramref name="start"/> is null.</exception> /// <exception cref="InsufficientDataException">There are not more data points than fit parameters.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public FitResult NonlinearRegression(Func <IList <double>, double, double> f, IList <double> start) { if (f == null) { throw new ArgumentNullException(nameof(f)); } if (start == null) { throw new ArgumentNullException(nameof(start)); } int n = this.Count; int d = start.Count; if (n <= d) { throw new InsufficientDataException(); } MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IList <double> a) => { double ss = 0.0; for (int i = 0; i < n; i++) { double r = yData[i] - f(a, xData[i]); ss += r * r; } return(ss); }, start); CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition(); if (cholesky == null) { throw new DivideByZeroException(); } SymmetricMatrix curvature = cholesky.Inverse(); curvature = (2.0 * min.Value / (n - d)) * curvature; FitResult result = new FitResult(min.Location, curvature, null); return(result); }
public void HilbertMatrixCholeskyDecomposition() { for (int d = 1; d <= 4; d++) { SymmetricMatrix H = TestUtilities.CreateSymmetricHilbertMatrix(d); // Decomposition succeeds CholeskyDecomposition CD = H.CholeskyDecomposition(); Assert.IsTrue(CD != null); Assert.IsTrue(CD.Dimension == d); // Decomposition works SquareMatrix S = CD.SquareRootMatrix(); Assert.IsTrue(TestUtilities.IsNearlyEqual(S * S.Transpose, H)); // Inverse works SymmetricMatrix HI = CD.Inverse(); Assert.IsTrue(TestUtilities.IsNearlyEqual(H * HI, UnitMatrix.OfDimension(d))); } }
internal static DistributionFitResult <ContinuousDistribution> MaximumLikelihoodFit(IReadOnlyList <double> sample, Func <IReadOnlyList <double>, ContinuousDistribution> factory, IReadOnlyList <double> start, IReadOnlyList <string> names) { Debug.Assert(sample != null); Debug.Assert(factory != null); Debug.Assert(start != null); Debug.Assert(names != null); Debug.Assert(start.Count == names.Count); // Define a log likelihood function Func <IReadOnlyList <double>, double> logL = (IReadOnlyList <double> a) => { ContinuousDistribution d = factory(a); double lnP = 0.0; foreach (double value in sample) { double P = d.ProbabilityDensity(value); if (P == 0.0) { throw new InvalidOperationException(); } lnP += Math.Log(P); } return(lnP); }; // Maximize it MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logL, start); ColumnVector b = maximum.Location; SymmetricMatrix C = maximum.HessianMatrix; CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); ContinuousDistribution distribution = factory(maximum.Location); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new ContinuousDistributionFitResult(names, b, C, distribution, test)); }
/// <summary> /// Fits the data to an arbitrary parameterized function. /// </summary> /// <param name="function">The fit function.</param> /// <param name="start">An initial guess at the parameters.</param> /// <returns>A fit result containing the best-fitting function parameters /// and a χ<sup>2</sup> test of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public UncertainMeasurementFitResult FitToFunction(Func <double[], T, double> function, double[] start) { if (function == null) { throw new ArgumentNullException(nameof(function)); } if (start == null) { throw new ArgumentNullException(nameof(start)); } // you can't do a fit with less data than parameters if (this.Count < start.Length) { throw new InsufficientDataException(); } // create a chi^2 fit metric and minimize it FitMetric <T> metric = new FitMetric <T>(this, function); SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start); // compute the covariance (Hessian) matrix by inverting the curvature matrix SymmetricMatrix A = 0.5 * minimum.Curvature(); CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum if (CD == null) { throw new DivideByZeroException(); } SymmetricMatrix C = CD.Inverse(); // package up the results and return them TestResult test = new TestResult("χ²", minimum.Value, new ChiSquaredDistribution(this.Count - minimum.Dimension), TestType.RightTailed); ParameterCollection parameters = new ParameterCollection(NumberNames(start.Length), new ColumnVector(minimum.Location(), 0, 1, start.Length, true), C); return(new UncertainMeasurementFitResult(parameters, test)); }
// We need a goodness-of-fit measurement internal LinearLogisticRegressionResult(IReadOnlyList <double> x, IReadOnlyList <bool> y) { Debug.Assert(x != null); Debug.Assert(y != null); Debug.Assert(x.Count == y.Count); // check size of data set int n = x.Count; if (n < 3) { throw new InsufficientDataException(); } // The linear logistic model is: // p_i = \sigma(t_i) \quad t_i = a + b x_i // So the log likelihood of the data set under the model is: // \ln L = \sum_{{\rm true} i} \ln p_i + \sum_{{\rm false} i} \ln (1 - p_i) // = \sum_{{\rm true} i} \ln \sigma(t_i) + \sum_{{\rm false} i} \ln (1 - \sigma(t_i)) // Taking derivatives: // \frac{\partial L}{\partial a} = \sum_{{\rm true} i} \frac{\sigma'(t_i)}{\sigma(t_i)} // + \sum_{{\rm false} i} \frac{-\sigma'(t_i)}{1 - \sigma(t_i)} // \frac{\partial L}{\partial b} = \sum_{{\rm true} i} \frac{\sigma'(t_i)}{\sigma(t_i)} x_i // + \sum_{{\rm false} i} \frac{-\sigma'(t_i)}{1 - \sigma(t_i)} x_i // Using \sigma(t) = \frac{1}{1 + e^{-t}}, we can derive: // \frac{\sigma'(t)}{\sigma(t)} = \sigma(-t) // \frac{\sigma'(t)}{1 - \sigma(t)} = \sigma(t) // So this becomes // \frac{\partial L}{\partial a} = \sum_i \pm \sigma(\mp t_i) // \frac{\partial L}{\partial b} = \sum_i \pm \sigma(\mp t_i) x_i // where the upper sign is for true values and the lower sign is for false values. // Find the simultaneous zeros of these equations to obtain the likelihood-maximizing a, b. // To get the curvature matrix, we need the second derivatives. // \frac{\partial^2 L}{\partial a^2} = - \sum_i \sigma'(\mp t_i) // \frac{\partial^2 L}{\partial a \partial b} = - \sum_i \sigma'(\mp t_i) x_i // \frac{\partial^2 L}{\partial b^2} = - \sum_i \sigma'(\mp t_i) x_i^2 // We need an initial guess at the parameters. Begin with the Ansatz of the logistic model: // \frac{p}{1-p} = e^{\alpha + \beta x} // Differentiate and do some algebra to get: // \frac{\partial p}{\partial x} = \beta p ( 1 - p) // Evaluating at means, and noting that p (1 - p) = var(y) and that, in a development around the means, // cov(p, x) = \frac{\partial p}{\partial x} var(x) // we get // \beta = \frac{cov(y, x)}{var(x) var(y)} // This approximation gets the sign right, but it looks like it usually gets the magnitude quite wrong. // The problem with the approach is that var(y) = p (1 - p) assumes y are chosen with fixed p, but they aren't. // We need to re-visit this analysis. double xMean, yMean, xxSum, yySum, xySum; Bivariate.ComputeBivariateMomentsUpToTwo(x, y.Select(z => z ? 1.0 : 0.0), out n, out xMean, out yMean, out xxSum, out yySum, out xySum); double p = yMean; double b0 = xySum / xxSum / yySum * n; double a0 = Math.Log(p / (1.0 - p)) - b0 * xMean; Func <IReadOnlyList <double>, IReadOnlyList <double> > J = (IReadOnlyList <double> a) => { double dLda = 0.0; double dLdb = 0.0; for (int i = 0; i < n; i++) { double t = a[0] + a[1] * x[i]; if (y[i]) { double s = Sigma(-t); dLda += s; dLdb += s * x[i]; } else { double s = Sigma(t); dLda -= s; dLdb -= s * x[i]; } } return(new double[] { dLda, dLdb }); }; ColumnVector b = MultiFunctionMath.FindZero(J, new double[] { a0, b0 }); SymmetricMatrix C = new SymmetricMatrix(2); for (int i = 0; i < n; i++) { double t = b[0] + b[1] * x[i]; if (y[i]) { t = -t; } double e = Math.Exp(-t); double sp = e / MoreMath.Sqr(1.0 + e); C[0, 0] += sp; C[0, 1] += sp * x[i]; C[1, 1] += sp * x[i] * x[i]; } CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); best = b; covariance = C; }
/// <summary> /// Finds the Beta distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is <see langword="null"/>.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> /// <exception cref="InvalidOperationException">Not all the entries in <paramref name="sample" /> lie between zero and one.</exception> public static BetaFitResult FitToBeta(this IReadOnlyList <double> sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } if (sample.Count < 3) { throw new InsufficientDataException(); } // maximum likelihood calculation // \log L = \sum_i \left[ (\alpha-1) \log x_i + (\beta-1) \log (1-x_i) - \log B(\alpha,\beta) \right] // using \frac{\partial B(a,b)}{\partial a} = \psi(a) - \psi(a+b), we have // \frac{\partial \log L}{\partial \alpha} = \sum_i \log x_i - N \left[ \psi(\alpha) - \psi(\alpha+\beta) \right] // \frac{\partial \log L}{\partial \beta} = \sum_i \log (1-x_i) - N \left[ \psi(\beta) - \psi(\alpha+\beta) \right] // set equal to zero to get equations for \alpha, \beta // \psi(\alpha) - \psi(\alpha+\beta) = <\log x> // \psi(\beta) - \psi(\alpha+\beta) = <\log (1-x)> // compute the mean log of x and (1-x) // these are the (logs of) the geometric means double ga = 0.0; double gb = 0.0; foreach (double value in sample) { if ((value <= 0.0) || (value >= 1.0)) { throw new InvalidOperationException(); } ga += Math.Log(value); gb += Math.Log(1.0 - value); } ga /= sample.Count; gb /= sample.Count; // define the function to zero Func <IReadOnlyList <double>, IReadOnlyList <double> > f = delegate(IReadOnlyList <double> x) { double pab = AdvancedMath.Psi(x[0] + x[1]); return(new double[] { AdvancedMath.Psi(x[0]) - pab - ga, AdvancedMath.Psi(x[1]) - pab - gb }); }; // guess initial values using the method of moments // M1 = \frac{\alpha}{\alpha+\beta} C2 = \frac{\alpha\beta}{(\alpha+\beta)^2 (\alpha+\beta+1)} // implies // \alpha = M1 \left( \frac{M1 (1-M1)}{C2} - 1 \right) // \beta = (1 - M1) \left( \frac{M1 (1-M1)}{C2} -1 \right) int n; double m, v; ComputeMomentsUpToSecond(sample, out n, out m, out v); v = v / n; double mm = 1.0 - m; double q = m * mm / v - 1.0; double[] x0 = new double[] { m *q, mm *q }; // find the parameter values that zero the two equations ColumnVector ab = MultiFunctionMath.FindZero(f, x0); double a = ab[0]; double b = ab[1]; // take more derivatives of \log L to get curvature matrix // \frac{\partial^2 \log L}{\partial\alpha^2} = - N \left[ \psi'(\alpha) - \psi'(\alpha+\beta) \right] // \frac{\partial^2 \log L}{\partial\beta^2} = - N \left[ \psi'(\beta) - \psi'(\alpha+\beta) \right] // \frac{\partial^2 \log L}{\partial \alpha \partial \beta} = - N \psi'(\alpha+\beta) // covariance matrix is inverse of curvature matrix SymmetricMatrix C = new SymmetricMatrix(2); C[0, 0] = sample.Count * (AdvancedMath.Psi(1, a) - AdvancedMath.Psi(1, a + b)); C[1, 1] = sample.Count * (AdvancedMath.Psi(1, b) - AdvancedMath.Psi(1, a + b)); C[0, 1] = sample.Count * AdvancedMath.Psi(1, a + b); CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); // do a KS test on the result BetaDistribution distribution = new BetaDistribution(a, b); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new BetaFitResult(ab, C, distribution, test)); }
/// <summary> /// Finds the Gamma distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static GammaFitResult FitToGamma(this IReadOnlyList <double> sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } if (sample.Count < 3) { throw new InsufficientDataException(); } // The log likelihood of a sample given k and s is // \log L = (k-1) \sum_i \log x_i - \frac{1}{s} \sum_i x_i - N \log \Gamma(k) - N k \log s // Differentiating, // \frac{\partial \log L}{\partial s} = \frac{1}{s^2} \sum_i x_i - \frac{N k}{s} // \frac{\partial \log L}{\partial k} = \sum_i \log x_i - N \psi(k) - N \log s // Setting the first equal to zero gives // k s = N^{-1} \sum_i x_i = <x> // \psi(k) + \log s = N^{-1} \sum_i \log x_i = <log x> // Inserting the first into the second gives a single equation for k // \log k - \psi(k) = \log <x> - <\log x> // Note the RHS need only be computed once. // \log k > \psi(k) for all k, so the RHS had better be positive. They get // closer for large k, so smaller RHS will produce a larger k. int n; double m, ss; ComputeMomentsUpToSecond(sample, out n, out m, out ss); double v = ss / n; double s = 0.0; foreach (double x in sample) { if (x <= 0.0) { throw new InvalidOperationException(); } s += Math.Log(x); } s = Math.Log(m) - s / n; // We can get an initial guess for k from the method of moments // \frac{\mu^2}{\sigma^2} = k double k0 = MoreMath.Sqr(m) / v; // Since 1/(2k) < \log(k) - \psi(k) < 1/k, we could get a bound; that // might be better to avoid the solver running into k < 0 territory double k1 = FunctionMath.FindZero(k => (Math.Log(k) - AdvancedMath.Psi(k) - s), k0); double s1 = m / k1; // Curvature of the log likelihood is straightforward // \frac{\partial^2 \log L}{\partial s^2} = -\frac{2}{s^3} \sum_i x_i + \frac{Nk}{s^2} = - \frac{Nk}{s^2} // \frac{\partial^2 \log L}{\partial k \partial s} = - \frac{N}{s} // \frac{\partial^2 \log L}{\partial k^2} = - N \psi'(k) // This gives the curvature matrix and thus via inversion the covariance matrix. SymmetricMatrix C = new SymmetricMatrix(2); C[0, 0] = n * AdvancedMath.Psi(1, k1); C[0, 1] = n / s1; C[1, 1] = n * k1 / MoreMath.Sqr(s1); CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); // Do a KS test for goodness-of-fit GammaDistribution distribution = new GammaDistribution(k1, s1); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new GammaFitResult(k1, s1, C, distribution, test)); }
// the internal linear regression routine, which assumes inputs are entirely valid private FitResult LinearRegression_Internal(int outputIndex) { // to do a fit, we need more data than parameters if (Count < Dimension) { throw new InsufficientDataException(); } // construct the design matrix SymmetricMatrix D = new SymmetricMatrix(Dimension); for (int i = 0; i < Dimension; i++) { for (int j = 0; j <= i; j++) { if (i == outputIndex) { if (j == outputIndex) { D[i, j] = Count; } else { D[i, j] = storage[j].Mean * Count; } } else { if (j == outputIndex) { D[i, j] = storage[i].Mean * Count; } else { double Dij = 0.0; for (int k = 0; k < Count; k++) { Dij += storage[i][k] * storage[j][k]; } D[i, j] = Dij; } } } } // construct the right hand side ColumnVector b = new ColumnVector(Dimension); for (int i = 0; i < Dimension; i++) { if (i == outputIndex) { b[i] = storage[i].Mean * Count; } else { double bi = 0.0; for (int k = 0; k < Count; k++) { bi += storage[outputIndex][k] * storage[i][k]; } b[i] = bi; } } // solve the system for the linear model parameters CholeskyDecomposition CD = D.CholeskyDecomposition(); ColumnVector parameters = CD.Solve(b); // find total sum of squares, with dof = # points - 1 (minus one for the variance-minimizing mean) double totalSumOfSquares = storage[outputIndex].Variance * Count; // find remaining unexplained sum of squares, with dof = # points - # parameters double unexplainedSumOfSquares = 0.0; for (int r = 0; r < Count; r++) { double y = 0.0; for (int c = 0; c < Dimension; c++) { if (c == outputIndex) { y += parameters[c]; } else { y += parameters[c] * storage[c][r]; } } unexplainedSumOfSquares += MoreMath.Sqr(y - storage[outputIndex][r]); } int unexplainedDegreesOfFreedom = Count - Dimension; double unexplainedVariance = unexplainedSumOfSquares / unexplainedDegreesOfFreedom; // find explained sum of squares, with dof = # parameters - 1 double explainedSumOfSquares = totalSumOfSquares - unexplainedSumOfSquares; int explainedDegreesOfFreedom = Dimension - 1; double explainedVariance = explainedSumOfSquares / explainedDegreesOfFreedom; // compute F statistic from sums of squares double F = explainedVariance / unexplainedVariance; Distribution fDistribution = new FisherDistribution(explainedDegreesOfFreedom, unexplainedDegreesOfFreedom); SymmetricMatrix covariance = unexplainedVariance * CD.Inverse(); return(new FitResult(parameters, covariance, new TestResult("F", F, TestType.RightTailed, fDistribution))); }
public static void GenerateValues2(int howMany) { var disitrubtion = new NormalDistribution(0, Math.Sqrt(1)); var randomValues = disitrubtion.Generate(howMany * 2); var meanA = 0.7; var meanB = 0.2; var sigma = new double[2, 2]; sigma[0, 0] = 1; sigma[0, 1] = 0.7; sigma[1, 0] = 0.7; sigma[1, 1] = 1; var randomValuesMatrix = new double[howMany, 2]; var verticalIndex = 0; for (int i = 0; i < howMany; i = i + 2) { randomValuesMatrix[verticalIndex, 0] = randomValues.ElementAt(i) - meanA; randomValuesMatrix[verticalIndex, 1] = randomValues.ElementAt(i + 1) - meanB; randomValuesMatrix[verticalIndex, 0] = randomValuesMatrix[verticalIndex, 0] > 0 ? 1 : 0; randomValuesMatrix[verticalIndex, 1] = randomValuesMatrix[verticalIndex, 1] > 0 ? 1 : 0; verticalIndex++; } var randomValuesMatrixCov = randomValuesMatrix.Covariance(); //new [] {meanA, meanB} var cholCovX = new CholeskyDecomposition(randomValuesMatrixCov).LeftTriangularFactor.Transpose(); var invCholCovX = cholCovX.Inverse(); var dottedInverse = randomValuesMatrix.Dot(invCholCovX); var result = dottedInverse.Dot(new CholeskyDecomposition(sigma).LeftTriangularFactor.Transpose()); var resultSigma = result.Covariance(); verticalIndex = 0; for (int i = 0; i < howMany; i++) { result[verticalIndex, 0] = result[verticalIndex, 0] > 0 ? 1 : 0; result[verticalIndex, 1] = result[verticalIndex, 1] > 0 ? 1 : 0; verticalIndex++; } var booleansigma = result.Covariance(); int randomValueCount = 0; //for (int i = 0; i < howMany; i++) //{ // //generating one sample // var z = new[] { randomValues[randomValueCount++]-meanA, randomValues[randomValueCount++]-meanB }; // var product = z.Dot(R); // var y = mean.Add(product); // var samples = new double[] { y[0] > 0 ? 1 : 0, y[1] > 0 ? 1 : 0 }; // aSamples[i] = samples[0]; // bSamples[i] = samples[1]; // Trace.WriteLine($"{aSamples[i]}, {bSamples[i]}"); //} }
internal MultiLinearLogisticRegressionResult(IReadOnlyList <bool> yColumn, IReadOnlyList <IReadOnlyList <double> > xColumns, IReadOnlyList <string> xNames) { Debug.Assert(yColumn != null); Debug.Assert(xColumns != null); Debug.Assert(xNames != null); Debug.Assert(xColumns.Count == xNames.Count); int n = yColumn.Count; int m = xColumns.Count; if (n <= m) { throw new InsufficientDataException(); } interceptIndex = -1; for (int c = 0; c < m; c++) { IReadOnlyList <double> xColumn = xColumns[c]; if (xColumn == null) { Debug.Assert(interceptIndex < 0); Debug.Assert(xNames[c] == "Intercept"); interceptIndex = c; } else { if (xColumn.Count != n) { throw new DimensionMismatchException(); } } } Debug.Assert(interceptIndex >= 0); // Define the log likelihood as a function of the parameter set Func <IReadOnlyList <double>, double> logLikelihood = (IReadOnlyList <double> a) => { Debug.Assert(a != null); Debug.Assert(a.Count == m); double L = 0.0; for (int k = 0; k < n; k++) { double t = 0.0; for (int i = 0; i < m; i++) { if (i == interceptIndex) { t += a[i]; } else { t += a[i] * xColumns[i][k]; } } double ez = Math.Exp(t); if (yColumn[k]) { L -= MoreMath.LogOnePlus(1.0 / ez); } else { L -= MoreMath.LogOnePlus(ez); } } return(L); }; // We need a better starting value. double[] start = new double[m]; //double[] start = new double[] { -1.5, +2.5, +0.5 }; // Search out the likelihood-maximizing parameter set. MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start); b = maximum.Location; CholeskyDecomposition CD = maximum.HessianMatrix.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); names = xNames; }
/// <summary> /// Performs a linear logistic regression analysis. /// </summary> /// <param name="outputIndex">The index of the column to predict.</param> /// <returns>A logistic multi-linear model fit. The kth parameter is the slope of the multi-linear model with respect to /// the kth column, except for k equal to the <paramref name="outputIndex"/>, for which it is the intercept.</returns> /// <remarks>Logistic linear regression is suited to situations where multiple input variables, either continuous or binary indicators, are used to predict /// the value of a binary output variable. Like a linear regression, a logistic linear regression tries to find a model that predicts the output variable using /// a linear combination of input variables. Unlike a simple linear regression, the model does not assume that this linear /// function predicts the output directly; instead it assumes that this function value is then fed into a logit link function, which /// maps the real numbers into the interval (0, 1), and interprets the value of this link function as the probability of obtaining success value /// for the output variable.</remarks> /// <exception cref="InvalidOperationException">The column to be predicted contains values other than 0 and 1.</exception> /// <exception cref="InsufficientDataException">There are not more rows in the sample than columns.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public FitResult LogisticLinearRegression(int outputIndex) { if ((outputIndex < 0) || (outputIndex >= this.Dimension)) { throw new ArgumentOutOfRangeException(nameof(outputIndex)); } if (this.Count <= this.Dimension) { throw new InsufficientDataException(); } // Define the log likelihood as a function of the parameter set Func <IList <double>, double> logLikelihood = (IList <double> a) => { double L = 0.0; for (int k = 0; k < this.Count; k++) { double z = 0.0; for (int i = 0; i < this.storage.Length; i++) { if (i == outputIndex) { z += a[i]; } else { z += a[i] * this.storage[i][k]; } } double ez = Math.Exp(z); double y = this.storage[outputIndex][k]; if (y == 0.0) { L -= Math.Log(1.0 + ez); } else if (y == 1.0) { L -= Math.Log(1.0 + 1.0 / ez); } else { throw new InvalidOperationException(); } } return(L); }; double[] start = new double[this.Dimension]; //for (int i = 0; i < start.Length; i++) { // if (i != outputIndex) start[i] = this.TwoColumns(i, outputIndex).Covariance / this.Column(i).Variance / this.Column(outputIndex).Variance; //} MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start); CholeskyDecomposition CD = maximum.HessianMatrix.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } FitResult result = new FitResult(maximum.Location, CD.Inverse(), null); return(result); }