internal NonlinearRegressionResult(
            IReadOnlyList <double> x, IReadOnlyList <double> y,
            Func <IReadOnlyList <double>, double, double> function,
            IReadOnlyList <double> start, IReadOnlyList <string> names)
        {
            Debug.Assert(x != null);
            Debug.Assert(y != null);
            Debug.Assert(function != null);
            Debug.Assert(start != null);
            Debug.Assert(names != null);
            Debug.Assert(x.Count == y.Count);
            Debug.Assert(start.Count > 0);
            Debug.Assert(names.Count == start.Count);

            int n = x.Count;
            int d = start.Count;

            if (n <= d)
            {
                throw new InsufficientDataException();
            }

            MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IReadOnlyList <double> a) => {
                double ss = 0.0;
                for (int i = 0; i < n; i++)
                {
                    double r = y[i] - function(a, x[i]);
                    ss      += r * r;
                }
                return(ss);
            }, start);

            CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition();

            if (cholesky == null)
            {
                throw new DivideByZeroException();
            }

            b = min.Location;
            C = cholesky.Inverse();
            C = (2.0 * min.Value / (n - d)) * C;

            sumOfSquaredResiduals = 0.0;
            residuals             = new List <double>(n);
            for (int i = 0; i < n; i++)
            {
                double z = y[i] - function(b, x[i]);
                sumOfSquaredResiduals += z * z;
                residuals.Add(z);
            }

            this.names    = names;
            this.function = function;
        }
Example #2
0
        public void SymmetricMatrixDecomposition()
        {
            for (int d = 1; d <= 4; d++)
            {
                SymmetricMatrix H = TestUtilities.CreateSymmetricHilbertMatrix(d);

                CholeskyDecomposition CD = H.CholeskyDecomposition();
                Assert.IsTrue(CD != null, String.Format("d={0} not positive definite", d));
                Assert.IsTrue(CD.Dimension == d);
                SymmetricMatrix HI = CD.Inverse();
                SquareMatrix    I  = TestUtilities.CreateSquareUnitMatrix(d);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(H * HI, I));
            }
        }
        /// <summary>
        /// Fits the data to an arbitrary parameterized function.
        /// </summary>
        /// <param name="function">The fit function.</param>
        /// <param name="start">An initial guess at the parameters.</param>
        /// <returns>A fit result containing the best-fitting function parameters
        /// and a &#x3C7;<sup>2</sup> test of the quality of the fit.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception>
        /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public FitResult FitToFunction(Func <double[], T, double> function, double[] start)
        {
            if (function == null)
            {
                throw new ArgumentNullException(nameof(function));
            }
            if (start == null)
            {
                throw new ArgumentNullException(nameof(start));
            }

            // you can't do a fit with less data than parameters
            if (this.Count < start.Length)
            {
                throw new InsufficientDataException();
            }

            /*
             * Func<IList<double>, double> function0 = (IList<double> x0) => {
             *  double[] x = new double[x0.Count];
             *  x0.CopyTo(x, 0);
             *  return(function(x));
             * };
             * MultiExtremum minimum0 = MultiFunctionMath.FindMinimum(function0, start);
             */

            // create a chi^2 fit metric and minimize it
            FitMetric <T> metric  = new FitMetric <T>(this, function);
            SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start);

            // compute the covariance (Hessian) matrix by inverting the curvature matrix
            SymmetricMatrix       A  = 0.5 * minimum.Curvature();
            CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            SymmetricMatrix C = CD.Inverse();

            // package up the results and return them
            TestResult test = new TestResult("ChiSquare", minimum.Value, TestType.RightTailed, new ChiSquaredDistribution(this.Count - minimum.Dimension));
            FitResult  fit  = new FitResult(minimum.Location(), C, test);

            return(fit);
        }
        /// <summary>
        /// Finds the parameterized function that best fits the data.
        /// </summary>
        /// <param name="f">The parameterized function.</param>
        /// <param name="start">An initial guess for the parameters.</param>
        /// <returns>The fit result.</returns>
        /// <remarks>
        /// <para>
        /// In the returned <see cref="FitResult"/>, the parameters appear in the same order as in
        /// the supplied fit function and initial guess vector. No goodness-of-fit test is returned.
        /// </para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="f"/> or <paramref name="start"/> is null.</exception>
        /// <exception cref="InsufficientDataException">There are not more data points than fit parameters.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public FitResult NonlinearRegression(Func <IList <double>, double, double> f, IList <double> start)
        {
            if (f == null)
            {
                throw new ArgumentNullException(nameof(f));
            }
            if (start == null)
            {
                throw new ArgumentNullException(nameof(start));
            }

            int n = this.Count;
            int d = start.Count;

            if (n <= d)
            {
                throw new InsufficientDataException();
            }

            MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IList <double> a) => {
                double ss = 0.0;
                for (int i = 0; i < n; i++)
                {
                    double r = yData[i] - f(a, xData[i]);
                    ss      += r * r;
                }
                return(ss);
            }, start);

            CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition();

            if (cholesky == null)
            {
                throw new DivideByZeroException();
            }
            SymmetricMatrix curvature = cholesky.Inverse();

            curvature = (2.0 * min.Value / (n - d)) * curvature;

            FitResult result = new FitResult(min.Location, curvature, null);

            return(result);
        }
Example #5
0
        public void HilbertMatrixCholeskyDecomposition()
        {
            for (int d = 1; d <= 4; d++)
            {
                SymmetricMatrix H = TestUtilities.CreateSymmetricHilbertMatrix(d);

                // Decomposition succeeds
                CholeskyDecomposition CD = H.CholeskyDecomposition();
                Assert.IsTrue(CD != null);
                Assert.IsTrue(CD.Dimension == d);

                // Decomposition works
                SquareMatrix S = CD.SquareRootMatrix();
                Assert.IsTrue(TestUtilities.IsNearlyEqual(S * S.Transpose, H));

                // Inverse works
                SymmetricMatrix HI = CD.Inverse();
                Assert.IsTrue(TestUtilities.IsNearlyEqual(H * HI, UnitMatrix.OfDimension(d)));
            }
        }
Example #6
0
        internal static DistributionFitResult <ContinuousDistribution> MaximumLikelihoodFit(IReadOnlyList <double> sample, Func <IReadOnlyList <double>, ContinuousDistribution> factory, IReadOnlyList <double> start, IReadOnlyList <string> names)
        {
            Debug.Assert(sample != null);
            Debug.Assert(factory != null);
            Debug.Assert(start != null);
            Debug.Assert(names != null);
            Debug.Assert(start.Count == names.Count);

            // Define a log likelihood function
            Func <IReadOnlyList <double>, double> logL = (IReadOnlyList <double> a) => {
                ContinuousDistribution d = factory(a);
                double lnP = 0.0;
                foreach (double value in sample)
                {
                    double P = d.ProbabilityDensity(value);
                    if (P == 0.0)
                    {
                        throw new InvalidOperationException();
                    }
                    lnP += Math.Log(P);
                }
                return(lnP);
            };

            // Maximize it
            MultiExtremum         maximum = MultiFunctionMath.FindLocalMaximum(logL, start);
            ColumnVector          b       = maximum.Location;
            SymmetricMatrix       C       = maximum.HessianMatrix;
            CholeskyDecomposition CD      = C.CholeskyDecomposition();

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            C = CD.Inverse();

            ContinuousDistribution distribution = factory(maximum.Location);
            TestResult             test         = sample.KolmogorovSmirnovTest(distribution);

            return(new ContinuousDistributionFitResult(names, b, C, distribution, test));
        }
        /// <summary>
        /// Fits the data to an arbitrary parameterized function.
        /// </summary>
        /// <param name="function">The fit function.</param>
        /// <param name="start">An initial guess at the parameters.</param>
        /// <returns>A fit result containing the best-fitting function parameters
        /// and a &#x3C7;<sup>2</sup> test of the quality of the fit.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception>
        /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public UncertainMeasurementFitResult FitToFunction(Func <double[], T, double> function, double[] start)
        {
            if (function == null)
            {
                throw new ArgumentNullException(nameof(function));
            }
            if (start == null)
            {
                throw new ArgumentNullException(nameof(start));
            }

            // you can't do a fit with less data than parameters
            if (this.Count < start.Length)
            {
                throw new InsufficientDataException();
            }

            // create a chi^2 fit metric and minimize it
            FitMetric <T> metric  = new FitMetric <T>(this, function);
            SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start);

            // compute the covariance (Hessian) matrix by inverting the curvature matrix
            SymmetricMatrix       A  = 0.5 * minimum.Curvature();
            CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            SymmetricMatrix C = CD.Inverse();

            // package up the results and return them
            TestResult          test       = new TestResult("χ²", minimum.Value, new ChiSquaredDistribution(this.Count - minimum.Dimension), TestType.RightTailed);
            ParameterCollection parameters = new ParameterCollection(NumberNames(start.Length), new ColumnVector(minimum.Location(), 0, 1, start.Length, true), C);

            return(new UncertainMeasurementFitResult(parameters, test));
        }
        // We need a goodness-of-fit measurement

        internal LinearLogisticRegressionResult(IReadOnlyList <double> x, IReadOnlyList <bool> y)
        {
            Debug.Assert(x != null);
            Debug.Assert(y != null);
            Debug.Assert(x.Count == y.Count);

            // check size of data set
            int n = x.Count;

            if (n < 3)
            {
                throw new InsufficientDataException();
            }

            // The linear logistic model is:
            //   p_i = \sigma(t_i) \quad t_i = a + b x_i
            // So the log likelihood of the data set under the model is:
            //   \ln L = \sum_{{\rm true} i} \ln p_i + \sum_{{\rm false} i} \ln (1 - p_i)
            //         = \sum_{{\rm true} i} \ln \sigma(t_i) + \sum_{{\rm false} i} \ln (1 - \sigma(t_i))
            // Taking derivatives:
            //   \frac{\partial L}{\partial a} = \sum_{{\rm true} i} \frac{\sigma'(t_i)}{\sigma(t_i)}
            //     + \sum_{{\rm false} i} \frac{-\sigma'(t_i)}{1 - \sigma(t_i)}
            //   \frac{\partial L}{\partial b} = \sum_{{\rm true} i} \frac{\sigma'(t_i)}{\sigma(t_i)} x_i
            //     + \sum_{{\rm false} i} \frac{-\sigma'(t_i)}{1 - \sigma(t_i)} x_i
            // Using \sigma(t) = \frac{1}{1 + e^{-t}}, we can derive:
            //   \frac{\sigma'(t)}{\sigma(t)} = \sigma(-t)
            //   \frac{\sigma'(t)}{1 - \sigma(t)} = \sigma(t)
            // So this becomes
            //   \frac{\partial L}{\partial a} = \sum_i \pm \sigma(\mp t_i)
            //   \frac{\partial L}{\partial b} = \sum_i \pm \sigma(\mp t_i) x_i
            // where the upper sign is for true values and the lower sign is for false values.
            // Find the simultaneous zeros of these equations to obtain the likelihood-maximizing a, b.

            // To get the curvature matrix, we need the second derivatives.
            //   \frac{\partial^2 L}{\partial a^2} = - \sum_i \sigma'(\mp t_i)
            //   \frac{\partial^2 L}{\partial a \partial b} = - \sum_i \sigma'(\mp t_i) x_i
            //   \frac{\partial^2 L}{\partial b^2} = - \sum_i \sigma'(\mp t_i) x_i^2

            // We need an initial guess at the parameters. Begin with the Ansatz of the logistic model:
            //    \frac{p}{1-p} = e^{\alpha + \beta x}
            // Differentiate and do some algebra to get:
            //    \frac{\partial p}{\partial x} = \beta p ( 1 - p)
            // Evaluating at means, and noting that p (1 - p) = var(y) and that, in a development around the means,
            //    cov(p, x) = \frac{\partial p}{\partial x} var(x)
            // we get
            //    \beta = \frac{cov(y, x)}{var(x) var(y)}
            // This approximation gets the sign right, but it looks like it usually gets the magnitude quite wrong.
            // The problem with the approach is that var(y) = p (1 - p) assumes y are chosen with fixed p, but they aren't.
            // We need to re-visit this analysis.

            double xMean, yMean, xxSum, yySum, xySum;

            Bivariate.ComputeBivariateMomentsUpToTwo(x, y.Select(z => z ? 1.0 : 0.0), out n, out xMean, out yMean, out xxSum, out yySum, out xySum);
            double p  = yMean;
            double b0 = xySum / xxSum / yySum * n;
            double a0 = Math.Log(p / (1.0 - p)) - b0 * xMean;

            Func <IReadOnlyList <double>, IReadOnlyList <double> > J = (IReadOnlyList <double> a) => {
                double dLda = 0.0;
                double dLdb = 0.0;
                for (int i = 0; i < n; i++)
                {
                    double t = a[0] + a[1] * x[i];
                    if (y[i])
                    {
                        double s = Sigma(-t);
                        dLda += s;
                        dLdb += s * x[i];
                    }
                    else
                    {
                        double s = Sigma(t);
                        dLda -= s;
                        dLdb -= s * x[i];
                    }
                }
                return(new double[] { dLda, dLdb });
            };

            ColumnVector b = MultiFunctionMath.FindZero(J, new double[] { a0, b0 });

            SymmetricMatrix C = new SymmetricMatrix(2);

            for (int i = 0; i < n; i++)
            {
                double t = b[0] + b[1] * x[i];
                if (y[i])
                {
                    t = -t;
                }
                double e  = Math.Exp(-t);
                double sp = e / MoreMath.Sqr(1.0 + e);
                C[0, 0] += sp;
                C[0, 1] += sp * x[i];
                C[1, 1] += sp * x[i] * x[i];
            }
            CholeskyDecomposition CD = C.CholeskyDecomposition();

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            C = CD.Inverse();

            best       = b;
            covariance = C;
        }
Example #9
0
        /// <summary>
        /// Finds the Beta distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameters.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is <see langword="null"/>.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception>
        /// <exception cref="InvalidOperationException">Not all the entries in <paramref name="sample" /> lie between zero and one.</exception>
        public static BetaFitResult FitToBeta(this IReadOnlyList <double> sample)
        {
            if (sample == null)
            {
                throw new ArgumentNullException(nameof(sample));
            }
            if (sample.Count < 3)
            {
                throw new InsufficientDataException();
            }

            // maximum likelihood calculation
            //   \log L = \sum_i \left[ (\alpha-1) \log x_i + (\beta-1) \log (1-x_i) - \log B(\alpha,\beta) \right]
            // using \frac{\partial B(a,b)}{\partial a} = \psi(a) - \psi(a+b), we have
            //   \frac{\partial \log L}{\partial \alpha} = \sum_i \log x_i -     N \left[ \psi(\alpha) - \psi(\alpha+\beta) \right]
            //   \frac{\partial \log L}{\partial \beta}  = \sum_i \log (1-x_i) - N \left[ \psi(\beta)  - \psi(\alpha+\beta) \right]
            // set equal to zero to get equations for \alpha, \beta
            //   \psi(\alpha) - \psi(\alpha+\beta) = <\log x>
            //   \psi(\beta) - \psi(\alpha+\beta) = <\log (1-x)>

            // compute the mean log of x and (1-x)
            // these are the (logs of) the geometric means
            double ga = 0.0; double gb = 0.0;

            foreach (double value in sample)
            {
                if ((value <= 0.0) || (value >= 1.0))
                {
                    throw new InvalidOperationException();
                }
                ga += Math.Log(value); gb += Math.Log(1.0 - value);
            }
            ga /= sample.Count; gb /= sample.Count;

            // define the function to zero
            Func <IReadOnlyList <double>, IReadOnlyList <double> > f = delegate(IReadOnlyList <double> x) {
                double pab = AdvancedMath.Psi(x[0] + x[1]);
                return(new double[] {
                    AdvancedMath.Psi(x[0]) - pab - ga,
                    AdvancedMath.Psi(x[1]) - pab - gb
                });
            };

            // guess initial values using the method of moments
            //   M1 = \frac{\alpha}{\alpha+\beta} C2 = \frac{\alpha\beta}{(\alpha+\beta)^2 (\alpha+\beta+1)}
            // implies
            //   \alpha = M1 \left( \frac{M1 (1-M1)}{C2} - 1 \right)
            //   \beta = (1 - M1) \left( \frac{M1 (1-M1)}{C2} -1 \right)
            int    n;
            double m, v;

            ComputeMomentsUpToSecond(sample, out n, out m, out v);
            v = v / n;

            double mm = 1.0 - m;
            double q  = m * mm / v - 1.0;

            double[] x0 = new double[] { m *q, mm *q };

            // find the parameter values that zero the two equations
            ColumnVector ab = MultiFunctionMath.FindZero(f, x0);
            double       a = ab[0]; double b = ab[1];

            // take more derivatives of \log L to get curvature matrix
            //   \frac{\partial^2 \log L}{\partial\alpha^2} = - N \left[ \psi'(\alpha) - \psi'(\alpha+\beta) \right]
            //   \frac{\partial^2 \log L}{\partial\beta^2}  = - N \left[ \psi'(\beta)  - \psi'(\alpha+\beta) \right]
            //   \frac{\partial^2 \log L}{\partial \alpha \partial \beta} = - N \psi'(\alpha+\beta)
            // covariance matrix is inverse of curvature matrix
            SymmetricMatrix C = new SymmetricMatrix(2);
            C[0, 0] = sample.Count * (AdvancedMath.Psi(1, a) - AdvancedMath.Psi(1, a + b));
            C[1, 1] = sample.Count * (AdvancedMath.Psi(1, b) - AdvancedMath.Psi(1, a + b));
            C[0, 1] = sample.Count * AdvancedMath.Psi(1, a + b);
            CholeskyDecomposition CD = C.CholeskyDecomposition();
            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            C = CD.Inverse();

            // do a KS test on the result
            BetaDistribution distribution = new BetaDistribution(a, b);
            TestResult       test         = sample.KolmogorovSmirnovTest(distribution);

            return(new BetaFitResult(ab, C, distribution, test));
        }
Example #10
0
        /// <summary>
        /// Finds the Gamma distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameters.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception>
        public static GammaFitResult FitToGamma(this IReadOnlyList <double> sample)
        {
            if (sample == null)
            {
                throw new ArgumentNullException(nameof(sample));
            }
            if (sample.Count < 3)
            {
                throw new InsufficientDataException();
            }

            // The log likelihood of a sample given k and s is
            //   \log L = (k-1) \sum_i \log x_i - \frac{1}{s} \sum_i x_i - N \log \Gamma(k) - N k \log s
            // Differentiating,
            //   \frac{\partial \log L}{\partial s} = \frac{1}{s^2} \sum_i x_i - \frac{N k}{s}
            //   \frac{\partial \log L}{\partial k} = \sum_i \log x_i - N \psi(k) - N \log s
            // Setting the first equal to zero gives
            //   k s = N^{-1} \sum_i x_i = <x>
            //   \psi(k) + \log s = N^{-1} \sum_i \log x_i = <log x>
            // Inserting the first into the second gives a single equation for k
            //   \log k - \psi(k) = \log <x> - <\log x>
            // Note the RHS need only be computed once.
            // \log k > \psi(k) for all k, so the RHS had better be positive. They get
            // closer for large k, so smaller RHS will produce a larger k.

            int    n;
            double m, ss;

            ComputeMomentsUpToSecond(sample, out n, out m, out ss);
            double v = ss / n;

            double s = 0.0;

            foreach (double x in sample)
            {
                if (x <= 0.0)
                {
                    throw new InvalidOperationException();
                }
                s += Math.Log(x);
            }
            s = Math.Log(m) - s / n;

            // We can get an initial guess for k from the method of moments
            //   \frac{\mu^2}{\sigma^2} = k

            double k0 = MoreMath.Sqr(m) / v;

            // Since 1/(2k) < \log(k) - \psi(k) < 1/k, we could get a bound; that
            // might be better to avoid the solver running into k < 0 territory

            double k1 = FunctionMath.FindZero(k => (Math.Log(k) - AdvancedMath.Psi(k) - s), k0);

            double s1 = m / k1;

            // Curvature of the log likelihood is straightforward
            //   \frac{\partial^2 \log L}{\partial s^2} = -\frac{2}{s^3} \sum_i x_i + \frac{Nk}{s^2} = - \frac{Nk}{s^2}
            //   \frac{\partial^2 \log L}{\partial k \partial s} = - \frac{N}{s}
            //   \frac{\partial^2 \log L}{\partial k^2} = - N \psi'(k)
            // This gives the curvature matrix and thus via inversion the covariance matrix.

            SymmetricMatrix C = new SymmetricMatrix(2);

            C[0, 0] = n * AdvancedMath.Psi(1, k1);
            C[0, 1] = n / s1;
            C[1, 1] = n * k1 / MoreMath.Sqr(s1);
            CholeskyDecomposition CD = C.CholeskyDecomposition();

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            C = CD.Inverse();

            // Do a KS test for goodness-of-fit
            GammaDistribution distribution = new GammaDistribution(k1, s1);
            TestResult        test         = sample.KolmogorovSmirnovTest(distribution);

            return(new GammaFitResult(k1, s1, C, distribution, test));
        }
Example #11
0
        // the internal linear regression routine, which assumes inputs are entirely valid

        private FitResult LinearRegression_Internal(int outputIndex)
        {
            // to do a fit, we need more data than parameters
            if (Count < Dimension)
            {
                throw new InsufficientDataException();
            }

            // construct the design matrix
            SymmetricMatrix D = new SymmetricMatrix(Dimension);

            for (int i = 0; i < Dimension; i++)
            {
                for (int j = 0; j <= i; j++)
                {
                    if (i == outputIndex)
                    {
                        if (j == outputIndex)
                        {
                            D[i, j] = Count;
                        }
                        else
                        {
                            D[i, j] = storage[j].Mean * Count;
                        }
                    }
                    else
                    {
                        if (j == outputIndex)
                        {
                            D[i, j] = storage[i].Mean * Count;
                        }
                        else
                        {
                            double Dij = 0.0;
                            for (int k = 0; k < Count; k++)
                            {
                                Dij += storage[i][k] * storage[j][k];
                            }
                            D[i, j] = Dij;
                        }
                    }
                }
            }

            // construct the right hand side
            ColumnVector b = new ColumnVector(Dimension);

            for (int i = 0; i < Dimension; i++)
            {
                if (i == outputIndex)
                {
                    b[i] = storage[i].Mean * Count;
                }
                else
                {
                    double bi = 0.0;
                    for (int k = 0; k < Count; k++)
                    {
                        bi += storage[outputIndex][k] * storage[i][k];
                    }
                    b[i] = bi;
                }
            }

            // solve the system for the linear model parameters
            CholeskyDecomposition CD         = D.CholeskyDecomposition();
            ColumnVector          parameters = CD.Solve(b);

            // find total sum of squares, with dof = # points - 1 (minus one for the variance-minimizing mean)
            double totalSumOfSquares = storage[outputIndex].Variance * Count;

            // find remaining unexplained sum of squares, with dof = # points - # parameters
            double unexplainedSumOfSquares = 0.0;

            for (int r = 0; r < Count; r++)
            {
                double y = 0.0;
                for (int c = 0; c < Dimension; c++)
                {
                    if (c == outputIndex)
                    {
                        y += parameters[c];
                    }
                    else
                    {
                        y += parameters[c] * storage[c][r];
                    }
                }
                unexplainedSumOfSquares += MoreMath.Sqr(y - storage[outputIndex][r]);
            }
            int    unexplainedDegreesOfFreedom = Count - Dimension;
            double unexplainedVariance         = unexplainedSumOfSquares / unexplainedDegreesOfFreedom;

            // find explained sum of squares, with dof = # parameters - 1
            double explainedSumOfSquares     = totalSumOfSquares - unexplainedSumOfSquares;
            int    explainedDegreesOfFreedom = Dimension - 1;
            double explainedVariance         = explainedSumOfSquares / explainedDegreesOfFreedom;

            // compute F statistic from sums of squares
            double       F             = explainedVariance / unexplainedVariance;
            Distribution fDistribution = new FisherDistribution(explainedDegreesOfFreedom, unexplainedDegreesOfFreedom);

            SymmetricMatrix covariance = unexplainedVariance * CD.Inverse();

            return(new FitResult(parameters, covariance, new TestResult("F", F, TestType.RightTailed, fDistribution)));
        }
        public static void GenerateValues2(int howMany)
        {
            var disitrubtion = new NormalDistribution(0, Math.Sqrt(1));
            var randomValues = disitrubtion.Generate(howMany * 2);

            var meanA = 0.7;
            var meanB = 0.2;
            var sigma = new double[2, 2];

            sigma[0, 0] = 1;
            sigma[0, 1] = 0.7;
            sigma[1, 0] = 0.7;
            sigma[1, 1] = 1;

            var randomValuesMatrix = new double[howMany, 2];
            var verticalIndex      = 0;

            for (int i = 0; i < howMany; i = i + 2)
            {
                randomValuesMatrix[verticalIndex, 0] = randomValues.ElementAt(i) - meanA;
                randomValuesMatrix[verticalIndex, 1] = randomValues.ElementAt(i + 1) - meanB;

                randomValuesMatrix[verticalIndex, 0] = randomValuesMatrix[verticalIndex, 0] > 0 ? 1 : 0;
                randomValuesMatrix[verticalIndex, 1] = randomValuesMatrix[verticalIndex, 1] > 0 ? 1 : 0;

                verticalIndex++;
            }

            var randomValuesMatrixCov = randomValuesMatrix.Covariance(); //new [] {meanA, meanB}
            var cholCovX    = new CholeskyDecomposition(randomValuesMatrixCov).LeftTriangularFactor.Transpose();
            var invCholCovX = cholCovX.Inverse();

            var dottedInverse = randomValuesMatrix.Dot(invCholCovX);
            var result        = dottedInverse.Dot(new CholeskyDecomposition(sigma).LeftTriangularFactor.Transpose());

            var resultSigma = result.Covariance();

            verticalIndex = 0;
            for (int i = 0; i < howMany; i++)
            {
                result[verticalIndex, 0] = result[verticalIndex, 0] > 0 ? 1 : 0;
                result[verticalIndex, 1] = result[verticalIndex, 1] > 0 ? 1 : 0;

                verticalIndex++;
            }

            var booleansigma = result.Covariance();

            int randomValueCount = 0;

            //for (int i = 0; i < howMany; i++)
            //{
            //    //generating one sample
            //    var z = new[] { randomValues[randomValueCount++]-meanA, randomValues[randomValueCount++]-meanB };
            //    var product = z.Dot(R);
            //    var y = mean.Add(product);
            //    var samples = new double[] { y[0] > 0 ? 1 : 0, y[1] > 0 ? 1 : 0 };

            //    aSamples[i] = samples[0];
            //    bSamples[i] = samples[1];

            //    Trace.WriteLine($"{aSamples[i]}, {bSamples[i]}");
            //}
        }
Example #13
0
        internal MultiLinearLogisticRegressionResult(IReadOnlyList <bool> yColumn, IReadOnlyList <IReadOnlyList <double> > xColumns, IReadOnlyList <string> xNames)
        {
            Debug.Assert(yColumn != null);
            Debug.Assert(xColumns != null);
            Debug.Assert(xNames != null);
            Debug.Assert(xColumns.Count == xNames.Count);

            int n = yColumn.Count;
            int m = xColumns.Count;

            if (n <= m)
            {
                throw new InsufficientDataException();
            }

            interceptIndex = -1;
            for (int c = 0; c < m; c++)
            {
                IReadOnlyList <double> xColumn = xColumns[c];
                if (xColumn == null)
                {
                    Debug.Assert(interceptIndex < 0);
                    Debug.Assert(xNames[c] == "Intercept");
                    interceptIndex = c;
                }
                else
                {
                    if (xColumn.Count != n)
                    {
                        throw new DimensionMismatchException();
                    }
                }
            }
            Debug.Assert(interceptIndex >= 0);


            // Define the log likelihood as a function of the parameter set
            Func <IReadOnlyList <double>, double> logLikelihood = (IReadOnlyList <double> a) => {
                Debug.Assert(a != null);
                Debug.Assert(a.Count == m);

                double L = 0.0;
                for (int k = 0; k < n; k++)
                {
                    double t = 0.0;
                    for (int i = 0; i < m; i++)
                    {
                        if (i == interceptIndex)
                        {
                            t += a[i];
                        }
                        else
                        {
                            t += a[i] * xColumns[i][k];
                        }
                    }
                    double ez = Math.Exp(t);

                    if (yColumn[k])
                    {
                        L -= MoreMath.LogOnePlus(1.0 / ez);
                    }
                    else
                    {
                        L -= MoreMath.LogOnePlus(ez);
                    }
                }
                return(L);
            };

            // We need  a better starting value.
            double[] start = new double[m];
            //double[] start = new double[] { -1.5, +2.5, +0.5 };

            // Search out the likelihood-maximizing parameter set.
            MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start);

            b = maximum.Location;
            CholeskyDecomposition CD = maximum.HessianMatrix.CholeskyDecomposition();

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            C = CD.Inverse();

            names = xNames;
        }
        /// <summary>
        /// Performs a linear logistic regression analysis.
        /// </summary>
        /// <param name="outputIndex">The index of the column to predict.</param>
        /// <returns>A logistic multi-linear model fit. The kth parameter is the slope of the multi-linear model with respect to
        /// the kth column, except for k equal to the <paramref name="outputIndex"/>, for which it is the intercept.</returns>
        /// <remarks>Logistic linear regression is suited to situations where multiple input variables, either continuous or binary indicators, are used to predict
        /// the value of a binary output variable. Like a linear regression, a logistic linear regression tries to find a model that predicts the output variable using
        /// a linear combination of input variables. Unlike a simple linear regression, the model does not assume that this linear
        /// function predicts the output directly; instead it assumes that this function value is then fed into a logit link function, which
        /// maps the real numbers into the interval (0, 1), and interprets the value of this link function as the probability of obtaining success value
        /// for the output variable.</remarks>
        /// <exception cref="InvalidOperationException">The column to be predicted contains values other than 0 and 1.</exception>
        /// <exception cref="InsufficientDataException">There are not more rows in the sample than columns.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public FitResult LogisticLinearRegression(int outputIndex)
        {
            if ((outputIndex < 0) || (outputIndex >= this.Dimension))
            {
                throw new ArgumentOutOfRangeException(nameof(outputIndex));
            }
            if (this.Count <= this.Dimension)
            {
                throw new InsufficientDataException();
            }

            // Define the log likelihood as a function of the parameter set
            Func <IList <double>, double> logLikelihood = (IList <double> a) => {
                double L = 0.0;
                for (int k = 0; k < this.Count; k++)
                {
                    double z = 0.0;
                    for (int i = 0; i < this.storage.Length; i++)
                    {
                        if (i == outputIndex)
                        {
                            z += a[i];
                        }
                        else
                        {
                            z += a[i] * this.storage[i][k];
                        }
                    }
                    double ez = Math.Exp(z);

                    double y = this.storage[outputIndex][k];
                    if (y == 0.0)
                    {
                        L -= Math.Log(1.0 + ez);
                    }
                    else if (y == 1.0)
                    {
                        L -= Math.Log(1.0 + 1.0 / ez);
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }
                }
                return(L);
            };

            double[] start = new double[this.Dimension];
            //for (int i = 0; i < start.Length; i++) {
            //    if (i != outputIndex) start[i] = this.TwoColumns(i, outputIndex).Covariance / this.Column(i).Variance / this.Column(outputIndex).Variance;
            //}

            MultiExtremum         maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start);
            CholeskyDecomposition CD      = maximum.HessianMatrix.CholeskyDecomposition();

            if (CD == null)
            {
                throw new DivideByZeroException();
            }

            FitResult result = new FitResult(maximum.Location, CD.Inverse(), null);

            return(result);
        }