private void _Initialize(int dimension, double[][] samples) { m_dimension = dimension; m_pmax = new double[m_dimension]; Distributions = new IFittableDistribution <double> [m_dimension]; if (samples != null) { Samples.AddRange(samples); } }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public void Fit(double[] observations, double[] weights, MixtureOptions options) { var pdf = new IFittableDistribution <double> [coefficients.Length]; for (int i = 0; i < components.Length; i++) { pdf[i] = (IFittableDistribution <double>)components[i]; } bool log = (options != null && options.Logarithm); if (log) { if (weights != null) { throw new ArgumentException("The model fitting algorithm does not" + " currently support different weights when the logarithm option" + " is enabled. To avoid this exception, pass 'null' as the second" + " parameter's value when calling this method."); } var em = new LogExpectationMaximization <double>(coefficients, pdf); if (options != null) { em.InnerOptions = options.InnerOptions; em.Convergence.Iterations = options.Iterations; em.Convergence.Tolerance = options.Threshold; } em.Compute(observations); } else { var em = new ExpectationMaximization <double>(coefficients, pdf); if (options != null) { em.InnerOptions = options.InnerOptions; em.Convergence.Iterations = options.Iterations; em.Convergence.Tolerance = options.Threshold; } em.Compute(observations, weights); } for (int i = 0; i < components.Length; i++) { cache[i] = components[i] = (T)pdf[i]; } this.initialize(); }
/// <summary> /// Initializes a new instance of the <see cref="DistributionAnalysis"/> class. /// </summary> /// /// <param name="observations">The observations to be fitted against candidate distributions.</param> /// public DistributionAnalysis(double[] observations) { this.data = observations; Distributions = new IFittableDistribution <double>[] { new NormalDistribution(), new UniformContinuousDistribution(), new GammaDistribution(), new GumbelDistribution(), new PoissonDistribution(), }; }
private double compute(TObservation[] observations, double[] weights) { // Estimation parameters double[] coefficients = Coefficients; var components = Distributions; double weightSum = 1; if (weights != null) { weightSum = weights.Sum(); } // 1. Initialize means, covariances and mixing coefficients // and evaluate the initial value of the log-likelihood int N = observations.Length; // Initialize responsibilities double[] norms = new double[N]; for (int k = 0; k < Gamma.Length; k++) { Gamma[k] = new double[N]; } // Clone the current distribution values double[] pi = (double[])coefficients.Clone(); var pdf = new IFittableDistribution <TObservation> [components.Length]; for (int i = 0; i < components.Length; i++) { pdf[i] = (IFittableDistribution <TObservation>)components[i]; } // Prepare the iteration Convergence.NewValue = LogLikelihood(pi, pdf, observations, weights, weightSum); // Start do { // 2. Expectation: Evaluate the component distributions // responsibilities using the current parameter values. Parallel.For(0, Gamma.Length, k => { double[] gammak = Gamma[k]; for (int i = 0; i < observations.Length; i++) { gammak[i] = pi[k] * pdf[k].ProbabilityFunction(observations[i]); } }); Parallel.For(0, norms.Length, i => { double sum = 0; for (int k = 0; k < Gamma.Length; k++) { sum += Gamma[k][i]; } norms[i] = sum; }); try { Parallel.For(0, Gamma.Length, k => { double[] gammak = Gamma[k]; for (int i = 0; i < gammak.Length; i++) { gammak[i] = (norms[i] != 0) ? gammak[i] / norms[i] : 0; } if (weights != null) { for (int i = 0; i < weights.Length; i++) { gammak[i] *= weights[i]; } } double sum = gammak.Sum(); if (Double.IsInfinity(sum) || Double.IsNaN(sum)) { sum = 0; } // 3. Maximization: Re-estimate the distribution parameters // using the previously computed responsibilities pi[k] = sum; if (sum == 0) { return; } for (int i = 0; i < gammak.Length; i++) { gammak[i] /= sum; } pdf[k].Fit(observations, gammak, InnerOptions); }); } catch (AggregateException ex) { if (ex.InnerException is NonPositiveDefiniteMatrixException) { throw ex.InnerException; } } double sumPi = pi.Sum(); for (int i = 0; i < pi.Length; i++) { pi[i] /= sumPi; } // 4. Evaluate the log-likelihood and check for convergence Convergence.NewValue = LogLikelihood(pi, pdf, observations, weights, weightSum); } while (!Convergence.HasConverged); double newLikelihood = Convergence.NewValue; if (Double.IsNaN(newLikelihood) || Double.IsInfinity(newLikelihood)) { throw new ConvergenceException("Fitting did not converge."); } // Become the newly fitted distribution. for (int i = 0; i < pi.Length; i++) { Coefficients[i] = pi[i]; } for (int i = 0; i < pdf.Length; i++) { Distributions[i] = pdf[i]; } return(newLikelihood); }
private double compute(TObservation[] observations) { // Estimation parameters double[] coefficients = Coefficients; var components = Distributions; // 1. Initialize means, covariances and mixing coefficients // and evaluate the initial value of the log-likelihood int N = observations.Length; // Initialize responsibilities double[] lnnorms = new double[N]; for (int k = 0; k < LogGamma.Length; k++) { LogGamma[k] = new double[N]; } // Clone the current distribution values double[] logPi = Matrix.Log(coefficients); var pdf = new IFittableDistribution <TObservation> [components.Length]; for (int i = 0; i < components.Length; i++) { pdf[i] = (IFittableDistribution <TObservation>)components[i]; } // Prepare the iteration Convergence.NewValue = LogLikelihood(logPi, pdf, observations); // Start do { // 2. Expectation: Evaluate the component distributions // responsibilities using the current parameter values. Parallel.For(0, LogGamma.Length, k => { double[] logGammak = LogGamma[k]; for (int i = 0; i < observations.Length; i++) { logGammak[i] = logPi[k] + pdf[k].LogProbabilityFunction(observations[i]); } }); Parallel.For(0, lnnorms.Length, i => { double lnsum = Double.NegativeInfinity; for (int k = 0; k < LogGamma.Length; k++) { lnsum = Special.LogSum(lnsum, LogGamma[k][i]); } lnnorms[i] = lnsum; }); try { Parallel.For(0, LogGamma.Length, k => { double[] lngammak = LogGamma[k]; double lnsum = Double.NegativeInfinity; for (int i = 0; i < lngammak.Length; i++) { double value = double.NegativeInfinity; if (lnnorms[i] != Double.NegativeInfinity) { value = lngammak[i] - lnnorms[i]; } lngammak[i] = value; lnsum = Special.LogSum(lnsum, value); } if (Double.IsNaN(lnsum)) { lnsum = Double.NegativeInfinity; } // 3. Maximization: Re-estimate the distribution parameters // using the previously computed responsibilities logPi[k] = lnsum; if (lnsum == Double.NegativeInfinity) { return; } for (int i = 0; i < lngammak.Length; i++) { lngammak[i] = Math.Exp(lngammak[i] - lnsum); } pdf[k].Fit(observations, lngammak, InnerOptions); }); } catch (AggregateException ex) { if (ex.InnerException is NonPositiveDefiniteMatrixException) { throw ex.InnerException; } } double lnsumPi = Double.NegativeInfinity; for (int i = 0; i < logPi.Length; i++) { lnsumPi = Special.LogSum(lnsumPi, logPi[i]); } for (int i = 0; i < logPi.Length; i++) { logPi[i] -= lnsumPi; } // 4. Evaluate the log-likelihood and check for convergence Convergence.NewValue = LogLikelihood(logPi, pdf, observations); } while (!Convergence.HasConverged); double newLikelihood = Convergence.NewValue; if (Double.IsNaN(newLikelihood) || Double.IsInfinity(newLikelihood)) { throw new ConvergenceException("Fitting did not converge."); } // Become the newly fitted distribution. for (int i = 0; i < logPi.Length; i++) { Coefficients[i] = Math.Exp(logPi[i]); } for (int i = 0; i < pdf.Length; i++) { Distributions[i] = pdf[i]; } return(newLikelihood); }