/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { Fit(observations, weights, options as EmpiricalHazardOptions); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (observations.Length != weights.Length) throw new ArgumentException("The weight vector should have the same size as the observations", "weights"); double[] p = new double[probabilities.Length]; for (int i = 0; i < observations.Length; i++) { int symbol = (int)observations[i]; p[symbol] += weights[i]; } initialize(0, p); }
/// <summary> /// Initializes a new instance of the <see cref="IndependentOptions"/> class. /// </summary> /// /// <param name="innerOptions">The fitting options for the inner /// component distributions of the independent distributions.</param> /// public IndependentOptions(IFittingOptions[] innerOptions) { InnerOptions = innerOptions; }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public virtual void Fit(double[] observations, IFittingOptions options) { Fit(observations, null, options); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). /// </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples.</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// void IDistribution.Fit(Array observations, double[] weights, IFittingOptions options) { double[][] multivariate = observations as double[][]; if (multivariate != null) { Fit(multivariate, weights, options); return; } double[] univariate = observations as double[]; if (univariate != null) { Fit(univariate.Split(dimension), weights, options); return; } throw new ArgumentException("Unsupported parameter type.", "observations"); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { double[] means; double[,] cov; NormalOptions opt = options as NormalOptions; if (weights != null) { #if DEBUG double sum = 0; for (int i = 0; i < weights.Length; i++) { if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i])) { throw new Exception("Invalid numbers in the weight vector."); } sum += weights[i]; } if (Math.Abs(sum - 1.0) > 1e-10) { throw new Exception("Weights do not sum to one."); } #endif // Compute weighted mean vector means = Statistics.Tools.Mean(observations, weights); // Compute weighted covariance matrix if (opt != null && opt.Diagonal) { cov = Matrix.Diagonal(Statistics.Tools.WeightedVariance(observations, weights, means)); } else { cov = Statistics.Tools.WeightedCovariance(observations, weights, means); } } else { // Compute mean vector means = Statistics.Tools.Mean(observations); // Compute covariance matrix if (opt != null && opt.Diagonal) { cov = Matrix.Diagonal(Statistics.Tools.Variance(observations, means)); } cov = Statistics.Tools.Covariance(observations, means); } CholeskyDecomposition chol = new CholeskyDecomposition(cov, false, true); if (opt != null) { // Parse optional estimation options double regularization = opt.Regularization; if (regularization > 0) { int dimension = observations[0].Length; while (!chol.PositiveDefinite) { for (int i = 0; i < dimension; i++) { for (int j = 0; j < dimension; j++) { if (Double.IsNaN(cov[i, j]) || Double.IsInfinity(cov[i, j])) { cov[i, j] = 0.0; } } cov[i, i] += regularization; } chol = new CholeskyDecomposition(cov, false, true); } } } if (!chol.PositiveDefinite) { throw new NonPositiveDefiniteMatrixException("Covariance matrix is not positive " + "definite. Try specifying a regularization constant in the fitting options."); } // Become the newly fitted distribution. initialize(means, cov, chol); }
private void compute(double[] data, double[] weights) { bool[] fail = new bool[Distributions.Count]; // Step 1. Fit all candidate distributions to the data. for (int i = 0; i < Distributions.Count; i++) { var distribution = Distributions[i]; IFittingOptions options = null; Options.TryGetValue(distribution, out options); try { distribution.Fit(data, weights, options); } catch { // TODO: Maybe revisit the decision to swallow exceptions here. fail[i] = true; } } // Step 2. Use statistical tests to see how well each // distribution was able to model the data. KolmogorovSmirnov = new KolmogorovSmirnovTest[Distributions.Count]; ChiSquare = new ChiSquareTest[Distributions.Count]; AndersonDarling = new AndersonDarlingTest[Distributions.Count]; DistributionNames = new string[Distributions.Count]; double[] ks = new double[Distributions.Count]; double[] cs = new double[Distributions.Count]; double[] ad = new double[Distributions.Count]; var measures = new List <GoodnessOfFit>(); for (int i = 0; i < Distributions.Count; i++) { ks[i] = Double.NegativeInfinity; cs[i] = Double.NegativeInfinity; ad[i] = Double.NegativeInfinity; var d = this.Distributions[i] as IUnivariateDistribution <double>; if (d == null) { continue; } this.DistributionNames[i] = GetName(d.GetType()); if (fail[i]) { continue; } int ms = 5000; run(() => { this.KolmogorovSmirnov[i] = new KolmogorovSmirnovTest(data, d); ks[i] = -KolmogorovSmirnov[i].Statistic; }, ms); run(() => { this.ChiSquare[i] = new ChiSquareTest(data, d); cs[i] = -ChiSquare[i].Statistic; }, ms); run(() => { this.AndersonDarling[i] = new AndersonDarlingTest(data, d); ad[i] = AndersonDarling[i].Statistic; }, ms); if (Double.IsNaN(ks[i])) { ks[i] = Double.NegativeInfinity; } if (Double.IsNaN(cs[i])) { cs[i] = Double.NegativeInfinity; } if (Double.IsNaN(ad[i])) { ad[i] = Double.NegativeInfinity; } measures.Add(new GoodnessOfFit(this, i)); } this.KolmogorovSmirnovRank = getRank(ks); this.ChiSquareRank = getRank(cs); this.AndersonDarlingRank = getRank(ad); measures.Sort(); this.GoodnessOfFit = new GoodnessOfFitCollection(measures); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples. </param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// void IDistribution.Fit(Array observations, double[] weights, IFittingOptions options) { double[] univariate = observations as double[]; if (univariate != null) { Fit(univariate, weights, options); return; } double[][] multivariate = observations as double[][]; if (multivariate != null) { Fit(Matrix.Concatenate(multivariate), weights, options); return; } throw new ArgumentException("Invalid input type.", "observations"); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, /// such as regularization constants and additional parameters.</param> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { Fit(observations, weights, options as GeneralizedBetaOptions); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { double mean = Statistics.Tools.WeightedMean(observations, weights); initialize(mean); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { double m, k; if (weights != null) { m = Circular.WeightedMean(observations, weights); k = Circular.WeightedConcentration(observations, weights, m); } else { m = Circular.Mean(observations); k = Circular.Concentration(observations, m); } if (options != null) { // Parse optional estimation options VonMisesOptions o = (VonMisesOptions)options; if (o.UseBiasCorrection) { double N = observations.Length; if (k < 2) { k = System.Math.Max(k - 1.0 / (2.0 * (N * k)), 0); } else { double Nm1 = N - 1; k = (Nm1 * Nm1 * Nm1 * k) / (N * N * N + N); } } } initialize(m, k); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (options != null) throw new ArgumentException("This method does not accept fitting options."); if (weights != null) throw new ArgumentException("This distribution does not support weighted samples."); double sum = 0; for (int i = 0; i < observations.Length; i++) sum += observations[i] * observations[i]; sigma = Math.Sqrt(1.0 / (2.0 * observations.Length) * sum); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { throw new NotImplementedException(); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// void IDistribution.Fit(Array observations, IFittingOptions options) { (this as IDistribution).Fit(observations, null, options); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { Fit(observations, weights, (GammaOptions)options); }
/// <summary> /// Initializes a new instance of the <see cref="MixtureOptions"/> class. /// </summary> /// /// <param name="threshold">The convergence criterion for the /// Expectation-Maximization algorithm. Default is 1e-3.</param> /// <param name="innerOptions">The fitting options for the inner /// component distributions of the mixture density.</param> /// public MixtureOptions(double threshold, IFittingOptions innerOptions) { Threshold = threshold; InnerOptions = innerOptions; }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { if (options != null) { throw new ArgumentException("This method does not accept fitting options."); } for (int i = 0; i < probabilities.Length; i++) { probabilities[i] = 0; } if (weights != null) { if (observations.Length != weights.Length) { throw new DimensionMismatchException("weights", "The weight vector should have the same size as the observations"); } for (int i = 0; i < observations.Length; i++) { double[] x = observations[i]; int index = 0; for (int j = 0; j < x.Length; j++) { index += (int)x[j] * positions[j]; } probabilities[index] += weights[i]; } } else { for (int i = 0; i < observations.Length; i++) { double[] x = observations[i]; int index = 0; for (int j = 0; j < x.Length; j++) { index += (int)x[j] * positions[j]; } probabilities[index]++; } } double sum = 0; for (int i = 0; i < probabilities.Length; i++) { sum += probabilities[i]; } if (sum != 0 && sum != 1) { // TODO: add the following in a JointOption class: // avoid locking a parameter in zero. // if (num == 0) num = 1e-10; // assert that probabilities sum up to 1. for (int i = 0; i < probabilities.Length; i++) { probabilities[i] /= sum; } } }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). /// </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples.</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public virtual void Fit(double[] observations, double[] weights, IFittingOptions options) { throw new NotSupportedException(); }
/// <summary> /// Initializes a new instance of the <see cref="MixtureOptions"/> class. /// </summary> /// /// <param name="threshold">The convergence criterion for the /// Expectation-Maximization algorithm. Default is 1e-3.</param> /// <param name="innerOptions">The fitting options for the inner /// component distributions of the mixture density.</param> /// public MixtureOptions(double threshold, IFittingOptions innerOptions) { Threshold = threshold; InnerOptions = innerOptions; }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). /// </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples.</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public abstract void Fit(double[][] observations, double[] weights, IFittingOptions options);
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { // Estimation parameters double threshold = 1e-3; IFittingOptions innerOptions = null; #if DEBUG for (int i = 0; i < weights.Length; i++) { if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i])) { throw new Exception("Invalid numbers in the weight vector."); } } #endif if (options != null) { // Process optional arguments MixtureOptions o = (MixtureOptions)options; threshold = o.Threshold; innerOptions = o.InnerOptions; } // 1. Initialize means, covariances and mixing coefficients // and evaluate the initial value of the log-likelihood int N = observations.Length; int K = components.Length; double weightSum = weights.Sum(); // Initialize responsibilities double[] norms = new double[N]; double[][] gamma = new double[K][]; for (int k = 0; k < gamma.Length; k++) { gamma[k] = new double[N]; } // Clone the current distribution values double[] pi = (double[])coefficients.Clone(); T[] pdf = new T[components.Length]; for (int i = 0; i < components.Length; i++) { pdf[i] = (T)components[i].Clone(); } // Prepare the iteration double likelihood = logLikelihood(pi, pdf, observations, weights); bool converged = false; // Start while (!converged) { // 2. Expectation: Evaluate the component distributions // responsibilities using the current parameter values. Array.Clear(norms, 0, norms.Length); for (int k = 0; k < gamma.Length; k++) { for (int i = 0; i < observations.Length; i++) { norms[i] += gamma[k][i] = pi[k] * pdf[k].ProbabilityFunction(observations[i]); } } for (int k = 0; k < gamma.Length; k++) { for (int i = 0; i < weights.Length; i++) { if (norms[i] != 0) { gamma[k][i] *= weights[i] / norms[i]; } } } // 3. Maximization: Re-estimate the distribution parameters // using the previously computed responsibilities for (int k = 0; k < gamma.Length; k++) { double sum = gamma[k].Sum(); for (int i = 0; i < gamma[k].Length; i++) { gamma[k][i] /= sum; } pi[k] = sum / weightSum; pdf[k].Fit(observations, gamma[k], innerOptions); } // 4. Evaluate the log-likelihood and check for convergence double newLikelihood = logLikelihood(pi, pdf, observations, weights); if (Double.IsNaN(newLikelihood) || Double.IsInfinity(newLikelihood)) { throw new ConvergenceException("Fitting did not converge."); } if (Math.Abs(likelihood - newLikelihood) < threshold * Math.Abs(likelihood)) { converged = true; } likelihood = newLikelihood; } // Become the newly fitted distribution. this.initialize(pi, pdf); }
/// <summary> /// Initializes a new instance of the <see cref="IndependentOptions"/> class. /// </summary> /// /// <param name="innerOption">The fitting options for the inner /// component distributions of the independent distributions.</param> /// public IndependentOptions(IFittingOptions innerOption) { InnerOption = innerOption; }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (options != null) throw new ArgumentException("This method does not accept fitting options."); double mean; double var; if (weights == null) { mean = observations.Mean(); var = observations.Variance(mean); } else { mean = observations.WeightedMean(weights); var = observations.WeightedVariance(weights, mean); } if (var >= mean * (1.0 - mean)) throw new NotSupportedException(); double u = (mean * (1 - mean) / var) - 1.0; double alpha = mean * u; double beta = (1 - mean) * u; init(alpha, beta); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, /// such as regularization constants and additional parameters.</param> /// public override void Fit(double[] observations, int[] weights, IFittingOptions options) { Fit(observations, weights, options as BetaOptions); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { double[] means; double[,] cov; if (weights != null) { // Compute weighted mean vector means = Statistics.Tools.Mean(observations, weights); // Compute weighted covariance matrix cov = Statistics.Tools.WeightedCovariance(observations, weights, means); } else { // Compute mean vector means = Statistics.Tools.Mean(observations); // Compute covariance matrix cov = Statistics.Tools.Covariance(observations, means); } CholeskyDecomposition chol = new CholeskyDecomposition(cov, false, true); if (options != null) { // Parse optional estimation options NormalOptions o = (NormalOptions)options; double regularization = o.Regularization; if (regularization > 0) { int dimension = observations[0].Length; while (!chol.PositiveDefinite) { for (int i = 0; i < dimension; i++) cov[i, i] += regularization; chol = new CholeskyDecomposition(cov, false, true); } } } if (!chol.PositiveDefinite) { throw new NonPositiveDefiniteMatrixException("Covariance matrix is not positive " + "definite. Try specifying a regularization constant in the fitting options."); } // Become the newly fitted distribution. initialize(means, cov, chol); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). /// </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples.</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public virtual void Fit(double[] observations, double[] weights, IFittingOptions options) { throw new NotSupportedException(); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (immutable) throw new InvalidOperationException(); double mu, var; if (weights != null) { #if DEBUG for (int i = 0; i < weights.Length; i++) if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i])) throw new Exception("Invalid numbers in the weight vector."); #endif // Compute weighted mean mu = Statistics.Tools.WeightedMean(observations, weights); // Compute weighted variance var = Statistics.Tools.WeightedVariance(observations, weights, mu); } else { // Compute weighted mean mu = Statistics.Tools.Mean(observations); // Compute weighted variance var = Statistics.Tools.Variance(observations, mu); } if (options != null) { // Parse optional estimation options NormalOptions o = (NormalOptions)options; double regularization = o.Regularization; if (var == 0 || Double.IsNaN(var) || Double.IsInfinity(var)) var = regularization; } if (var <= 0) { throw new ArgumentException("Variance is zero. Try specifying " + "a regularization constant in the fitting options."); } initialize(mu, Math.Sqrt(var), var); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { double[] pi = new double[probabilities.Length]; double size = weights.Length; for (int c = 0; c < probabilities.Length; c++) { for (int i = 0; i < observations.Length; i++) pi[c] += observations[i][c] * weights[i] * size; pi[c] /= N; } initialize(N, pi); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// void IDistribution.Fit(Array observations, IFittingOptions options) { (this as IDistribution).Fit(observations, null, options); }
/// <summary> /// This method is not supported. /// </summary> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { throw new System.NotSupportedException(); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public override void Fit(double[][] observations, int[] weights, IFittingOptions options) { Fit(observations, weights, options as MultivariateEmpiricalOptions); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// public override void Fit(double[][] observations, double[] weights, IFittingOptions options) { if (options != null) throw new ArgumentException("This method does not accept fitting options."); if (observations.Length != weights.Length) throw new ArgumentException("The weight vector should have the same size as the observations", "weights"); for (int i = 0; i < probabilities.Length; i++) probabilities[i] = 0; for (int i = 0; i < observations.Length; i++) { double[] x = observations[i]; int index = 0; for (int j = 0; j < x.Length; j++) index += (int)x[j] * positions[j]; probabilities[index] += weights[i]; } double sum = 0; for (int i = 0; i < probabilities.Length; i++) sum += probabilities[i]; if (sum != 0 && sum != 1) { // avoid locking a parameter in zero. // if (num == 0) num = 1e-10; // assert that probabilities sum up to 1. for (int i = 0; i < probabilities.Length; i++) probabilities[i] /= sum; } }
/// <summary> /// Initializes a new instance of the <see cref="IndependentOptions"/> class. /// </summary> /// /// <param name="innerOption">The fitting options for the inner /// component distributions of the independent distributions.</param> /// public IndependentOptions(IFittingOptions innerOption) { InnerOption = innerOption; }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (options != null) throw new ArgumentException("This method does not accept fitting options."); // R. Kolar, R. Jirik, J. Jan (2004) "Estimator Comparison of the // Nakagami-m Parameter and Its Application in Echocardiography", // Radioengineering, 13 (1), 8–12 double[] x2 = Matrix.ElementwisePower(observations, 2); double mean, var; if (weights == null) { mean = Statistics.Tools.Mean(x2); var = Statistics.Tools.Variance(x2); } else { mean = Statistics.Tools.WeightedMean(x2, weights); var = Statistics.Tools.WeightedVariance(x2, weights); } double shape = (mean * mean) / var; double spread = mean; init(shape, spread); }
/// <summary> /// Not supported. /// </summary> /// public override void Fit(double[][,] observations, double[] weights, IFittingOptions options) { throw new NotSupportedException(); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public virtual void Fit(double[] observations, IFittingOptions options) { Fit(observations, null, options); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { double mean = Statistics.Tools.WeightedMean(observations, weights); initialize(mean); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// void IDistribution.Fit(Array observations, IFittingOptions options) { double[] weights = new double[observations.Length]; // Create equal weights for the observations double w = 1.0 / observations.Length; for (int i = 0; i < weights.Length; i++) weights[i] = w; (this as IDistribution).Fit(observations, weights, options); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data). /// </param> /// <param name="weights"> /// The weight vector containing the weight for each of the samples.</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public abstract void Fit(double[] observations, double[] weights, IFittingOptions options);
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="options"> /// Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public virtual void Fit(double[][] observations, IFittingOptions options) { double[] weights = new double[observations.Length]; // Create equal weights for the observations double w = 1.0 / observations.Length; for (int i = 0; i < weights.Length; i++) weights[i] = w; Fit(observations, weights, options); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations"> /// The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, /// such as regularization constants and additional parameters.</param> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { Fit(observations, weights, options as TriangularOptions); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { if (options != null) throw new ArgumentException("This method does not accept fitting options."); double mean; double lambda; int n = observations.Length; if (weights == null) { mean = observations.Mean(); double sum = 0; for (int i = 0; i < observations.Length; i++) sum += (1.0 / observations[i] - 1.0 / mean); lambda = (n * n) / sum; } else { mean = observations.WeightedMean(observations); double sum = 0; for (int i = 0; i < observations.Length; i++) sum += weights[i] * (1.0 / observations[i] - 1.0 / mean); lambda = n / sum; } init(mean, lambda); }
/// <summary> /// Fits the underlying distribution to a given set of observations. /// </summary> /// /// <param name="observations">The array of observations to fit the model against. The array /// elements can be either of type double (for univariate data) or /// type double[] (for multivariate data).</param> /// <param name="weights">The weight vector containing the weight for each of the samples.</param> /// <param name="options">Optional arguments which may be used during fitting, such /// as regularization constants and additional parameters.</param> /// /// <remarks> /// Although both double[] and double[][] arrays are supported, /// providing a double[] for a multivariate distribution or a /// double[][] for a univariate distribution may have a negative /// impact in performance. /// </remarks> /// public override void Fit(double[] observations, double[] weights, IFittingOptions options) { throw new NotImplementedException(); }