/// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">The array of observations to fit the model against. The array
 /// elements can be either of type double (for univariate data) or
 /// type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 /// as regularization constants and additional parameters.</param>
 ///
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     Fit(observations, weights, options as EmpiricalHazardOptions);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (observations.Length != weights.Length)
                throw new ArgumentException("The weight vector should have the same size as the observations", "weights");

            double[] p = new double[probabilities.Length];
            for (int i = 0; i < observations.Length; i++)
            {
                int symbol = (int)observations[i];
                p[symbol] += weights[i];
            }

            initialize(0, p);
        }
 /// <summary>
 ///   Initializes a new instance of the <see cref="IndependentOptions"/> class.
 /// </summary>
 /// 
 /// <param name="innerOptions">The fitting options for the inner
 ///   component distributions of the independent distributions.</param>
 ///   
 public IndependentOptions(IFittingOptions[] innerOptions)
 {
     InnerOptions = innerOptions;
 }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 public virtual void Fit(double[] observations, IFittingOptions options)
 {
     Fit(observations, null, options);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// <param name="observations">
        ///   The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).
        /// </param>
        /// <param name="weights">
        ///   The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">
        ///   Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        void IDistribution.Fit(Array observations, double[] weights, IFittingOptions options)
        {
            double[][] multivariate = observations as double[][];
            if (multivariate != null)
            {
                Fit(multivariate, weights, options);
                return;
            }

            double[] univariate = observations as double[];
            if (univariate != null)
            {
                Fit(univariate.Split(dimension), weights, options);
                return;
            }

            throw new ArgumentException("Unsupported parameter type.", "observations");
        }
Beispiel #6
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        ///
        /// <param name="observations">The array of observations to fit the model against. The array
        /// elements can be either of type double (for univariate data) or
        /// type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        /// as regularization constants and additional parameters.</param>
        ///
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        ///
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            double[] means;
            double[,] cov;

            NormalOptions opt = options as NormalOptions;


            if (weights != null)
            {
#if DEBUG
                double sum = 0;
                for (int i = 0; i < weights.Length; i++)
                {
                    if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i]))
                    {
                        throw new Exception("Invalid numbers in the weight vector.");
                    }
                    sum += weights[i];
                }

                if (Math.Abs(sum - 1.0) > 1e-10)
                {
                    throw new Exception("Weights do not sum to one.");
                }
#endif
                // Compute weighted mean vector
                means = Statistics.Tools.Mean(observations, weights);

                // Compute weighted covariance matrix
                if (opt != null && opt.Diagonal)
                {
                    cov = Matrix.Diagonal(Statistics.Tools.WeightedVariance(observations, weights, means));
                }
                else
                {
                    cov = Statistics.Tools.WeightedCovariance(observations, weights, means);
                }
            }
            else
            {
                // Compute mean vector
                means = Statistics.Tools.Mean(observations);

                // Compute covariance matrix
                if (opt != null && opt.Diagonal)
                {
                    cov = Matrix.Diagonal(Statistics.Tools.Variance(observations, means));
                }
                cov = Statistics.Tools.Covariance(observations, means);
            }

            CholeskyDecomposition chol = new CholeskyDecomposition(cov, false, true);

            if (opt != null)
            {
                // Parse optional estimation options
                double regularization = opt.Regularization;

                if (regularization > 0)
                {
                    int dimension = observations[0].Length;

                    while (!chol.PositiveDefinite)
                    {
                        for (int i = 0; i < dimension; i++)
                        {
                            for (int j = 0; j < dimension; j++)
                            {
                                if (Double.IsNaN(cov[i, j]) || Double.IsInfinity(cov[i, j]))
                                {
                                    cov[i, j] = 0.0;
                                }
                            }

                            cov[i, i] += regularization;
                        }

                        chol = new CholeskyDecomposition(cov, false, true);
                    }
                }
            }

            if (!chol.PositiveDefinite)
            {
                throw new NonPositiveDefiniteMatrixException("Covariance matrix is not positive "
                                                             + "definite. Try specifying a regularization constant in the fitting options.");
            }

            // Become the newly fitted distribution.
            initialize(means, cov, chol);
        }
Beispiel #7
0
        private void compute(double[] data, double[] weights)
        {
            bool[] fail = new bool[Distributions.Count];

            // Step 1. Fit all candidate distributions to the data.
            for (int i = 0; i < Distributions.Count; i++)
            {
                var distribution = Distributions[i];

                IFittingOptions options = null;
                Options.TryGetValue(distribution, out options);

                try
                {
                    distribution.Fit(data, weights, options);
                }
                catch
                {
                    // TODO: Maybe revisit the decision to swallow exceptions here.
                    fail[i] = true;
                }
            }

            // Step 2. Use statistical tests to see how well each
            //         distribution was able to model the data.

            KolmogorovSmirnov = new KolmogorovSmirnovTest[Distributions.Count];
            ChiSquare         = new ChiSquareTest[Distributions.Count];
            AndersonDarling   = new AndersonDarlingTest[Distributions.Count];
            DistributionNames = new string[Distributions.Count];

            double[] ks = new double[Distributions.Count];
            double[] cs = new double[Distributions.Count];
            double[] ad = new double[Distributions.Count];

            var measures = new List <GoodnessOfFit>();

            for (int i = 0; i < Distributions.Count; i++)
            {
                ks[i] = Double.NegativeInfinity;
                cs[i] = Double.NegativeInfinity;
                ad[i] = Double.NegativeInfinity;

                var d = this.Distributions[i] as IUnivariateDistribution <double>;

                if (d == null)
                {
                    continue;
                }

                this.DistributionNames[i] = GetName(d.GetType());

                if (fail[i])
                {
                    continue;
                }

                int ms = 5000;

                run(() =>
                {
                    this.KolmogorovSmirnov[i] = new KolmogorovSmirnovTest(data, d);
                    ks[i] = -KolmogorovSmirnov[i].Statistic;
                }, ms);

                run(() =>
                {
                    this.ChiSquare[i] = new ChiSquareTest(data, d);
                    cs[i]             = -ChiSquare[i].Statistic;
                }, ms);

                run(() =>
                {
                    this.AndersonDarling[i] = new AndersonDarlingTest(data, d);
                    ad[i] = AndersonDarling[i].Statistic;
                }, ms);

                if (Double.IsNaN(ks[i]))
                {
                    ks[i] = Double.NegativeInfinity;
                }

                if (Double.IsNaN(cs[i]))
                {
                    cs[i] = Double.NegativeInfinity;
                }

                if (Double.IsNaN(ad[i]))
                {
                    ad[i] = Double.NegativeInfinity;
                }

                measures.Add(new GoodnessOfFit(this, i));
            }

            this.KolmogorovSmirnovRank = getRank(ks);
            this.ChiSquareRank         = getRank(cs);
            this.AndersonDarlingRank   = getRank(ad);

            measures.Sort();

            this.GoodnessOfFit = new GoodnessOfFitCollection(measures);
        }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">
        ///   The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data). </param>
        /// <param name="weights">
        ///   The weight vector containing the weight for each of the samples. </param>
        /// <param name="options">
        ///   Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        void IDistribution.Fit(Array observations, double[] weights, IFittingOptions options)
        {
            double[] univariate = observations as double[];
            if (univariate != null)
            {
                Fit(univariate, weights, options);
                return;
            }

            double[][] multivariate = observations as double[][];
            if (multivariate != null)
            {
                Fit(Matrix.Concatenate(multivariate), weights, options);
                return;
            }

            throw new ArgumentException("Invalid input type.", "observations");
        }
Beispiel #9
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting,
 ///   such as regularization constants and additional parameters.</param>
 ///
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     Fit(observations, weights, options as GeneralizedBetaOptions);
 }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     double mean = Statistics.Tools.WeightedMean(observations, weights);
     initialize(mean);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// 
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        /// as regularization constants and additional parameters.</param>
        /// 
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            double m, k;

            if (weights != null)
            {
                m = Circular.WeightedMean(observations, weights);
                k = Circular.WeightedConcentration(observations, weights, m);
            }
            else
            {
                m = Circular.Mean(observations);
                k = Circular.Concentration(observations, m);
            }

            if (options != null)
            {
                // Parse optional estimation options
                VonMisesOptions o = (VonMisesOptions)options;
                if (o.UseBiasCorrection)
                {
                    double N = observations.Length;
                    if (k < 2)
                    {
                        k = System.Math.Max(k - 1.0 / (2.0 * (N * k)), 0);
                    }
                    else
                    {
                        double Nm1 = N - 1;
                        k = (Nm1 * Nm1 * Nm1 * k) / (N * N * N + N);
                    }
                }
            }

            initialize(m, k);
        }
Beispiel #12
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
                throw new ArgumentException("This method does not accept fitting options.");

            if (weights != null)
                throw new ArgumentException("This distribution does not support weighted samples.");

            double sum = 0;
            for (int i = 0; i < observations.Length; i++)
                sum += observations[i] * observations[i];

            sigma = Math.Sqrt(1.0 / (2.0 * observations.Length) * sum);
        }
Beispiel #13
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     throw new NotImplementedException();
 }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 void IDistribution.Fit(Array observations, IFittingOptions options)
 {
     (this as IDistribution).Fit(observations, null, options);
 }
Beispiel #15
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     Fit(observations, weights, (GammaOptions)options);
 }
 /// <summary>
 ///   Initializes a new instance of the <see cref="MixtureOptions"/> class.
 /// </summary>
 /// 
 /// <param name="threshold">The convergence criterion for the
 ///   Expectation-Maximization algorithm. Default is 1e-3.</param>
 /// <param name="innerOptions">The fitting options for the inner
 ///   component distributions of the mixture density.</param>
 ///   
 public MixtureOptions(double threshold, IFittingOptions innerOptions)
 {
     Threshold = threshold;
     InnerOptions = innerOptions;
 }
Beispiel #17
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        ///
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
            {
                throw new ArgumentException("This method does not accept fitting options.");
            }


            for (int i = 0; i < probabilities.Length; i++)
            {
                probabilities[i] = 0;
            }

            if (weights != null)
            {
                if (observations.Length != weights.Length)
                {
                    throw new DimensionMismatchException("weights",
                                                         "The weight vector should have the same size as the observations");
                }

                for (int i = 0; i < observations.Length; i++)
                {
                    double[] x = observations[i];

                    int index = 0;
                    for (int j = 0; j < x.Length; j++)
                    {
                        index += (int)x[j] * positions[j];
                    }

                    probabilities[index] += weights[i];
                }
            }
            else
            {
                for (int i = 0; i < observations.Length; i++)
                {
                    double[] x = observations[i];

                    int index = 0;
                    for (int j = 0; j < x.Length; j++)
                    {
                        index += (int)x[j] * positions[j];
                    }

                    probabilities[index]++;
                }
            }

            double sum = 0;

            for (int i = 0; i < probabilities.Length; i++)
            {
                sum += probabilities[i];
            }

            if (sum != 0 && sum != 1)
            {
                // TODO: add the following in a JointOption class:
                // avoid locking a parameter in zero.
                // if (num == 0) num = 1e-10;

                // assert that probabilities sum up to 1.
                for (int i = 0; i < probabilities.Length; i++)
                {
                    probabilities[i] /= sum;
                }
            }
        }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).
 /// </param>
 /// <param name="weights">
 ///   The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 public virtual void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     throw new NotSupportedException();
 }
Beispiel #19
0
 /// <summary>
 ///   Initializes a new instance of the <see cref="MixtureOptions"/> class.
 /// </summary>
 ///
 /// <param name="threshold">The convergence criterion for the
 ///   Expectation-Maximization algorithm. Default is 1e-3.</param>
 /// <param name="innerOptions">The fitting options for the inner
 ///   component distributions of the mixture density.</param>
 ///
 public MixtureOptions(double threshold, IFittingOptions innerOptions)
 {
     Threshold    = threshold;
     InnerOptions = innerOptions;
 }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).
 /// </param>
 /// <param name="weights">
 ///   The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 public abstract void Fit(double[][] observations, double[] weights, IFittingOptions options);
Beispiel #21
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        ///
        /// <param name="observations">The array of observations to fit the model against. The array
        /// elements can be either of type double (for univariate data) or
        /// type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        /// as regularization constants and additional parameters.</param>
        ///
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        ///
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            // Estimation parameters
            double          threshold    = 1e-3;
            IFittingOptions innerOptions = null;

#if DEBUG
            for (int i = 0; i < weights.Length; i++)
            {
                if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i]))
                {
                    throw new Exception("Invalid numbers in the weight vector.");
                }
            }
#endif

            if (options != null)
            {
                // Process optional arguments
                MixtureOptions o = (MixtureOptions)options;
                threshold    = o.Threshold;
                innerOptions = o.InnerOptions;
            }


            // 1. Initialize means, covariances and mixing coefficients
            //    and evaluate the initial value of the log-likelihood

            int N = observations.Length;
            int K = components.Length;

            double weightSum = weights.Sum();

            // Initialize responsibilities
            double[]   norms = new double[N];
            double[][] gamma = new double[K][];
            for (int k = 0; k < gamma.Length; k++)
            {
                gamma[k] = new double[N];
            }

            // Clone the current distribution values
            double[] pi  = (double[])coefficients.Clone();
            T[]      pdf = new T[components.Length];
            for (int i = 0; i < components.Length; i++)
            {
                pdf[i] = (T)components[i].Clone();
            }

            // Prepare the iteration
            double likelihood = logLikelihood(pi, pdf, observations, weights);
            bool   converged  = false;

            // Start
            while (!converged)
            {
                // 2. Expectation: Evaluate the component distributions
                //    responsibilities using the current parameter values.
                Array.Clear(norms, 0, norms.Length);

                for (int k = 0; k < gamma.Length; k++)
                {
                    for (int i = 0; i < observations.Length; i++)
                    {
                        norms[i] += gamma[k][i] = pi[k] * pdf[k].ProbabilityFunction(observations[i]);
                    }
                }

                for (int k = 0; k < gamma.Length; k++)
                {
                    for (int i = 0; i < weights.Length; i++)
                    {
                        if (norms[i] != 0)
                        {
                            gamma[k][i] *= weights[i] / norms[i];
                        }
                    }
                }

                // 3. Maximization: Re-estimate the distribution parameters
                //    using the previously computed responsibilities
                for (int k = 0; k < gamma.Length; k++)
                {
                    double sum = gamma[k].Sum();

                    for (int i = 0; i < gamma[k].Length; i++)
                    {
                        gamma[k][i] /= sum;
                    }

                    pi[k] = sum / weightSum;
                    pdf[k].Fit(observations, gamma[k], innerOptions);
                }

                // 4. Evaluate the log-likelihood and check for convergence
                double newLikelihood = logLikelihood(pi, pdf, observations, weights);

                if (Double.IsNaN(newLikelihood) || Double.IsInfinity(newLikelihood))
                {
                    throw new ConvergenceException("Fitting did not converge.");
                }

                if (Math.Abs(likelihood - newLikelihood) < threshold * Math.Abs(likelihood))
                {
                    converged = true;
                }

                likelihood = newLikelihood;
            }

            // Become the newly fitted distribution.
            this.initialize(pi, pdf);
        }
 /// <summary>
 ///   Initializes a new instance of the <see cref="IndependentOptions"/> class.
 /// </summary>
 /// 
 /// <param name="innerOption">The fitting options for the inner
 ///   component distributions of the independent distributions.</param>
 ///   
 public IndependentOptions(IFittingOptions innerOption)
 {
     InnerOption = innerOption;
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
                throw new ArgumentException("This method does not accept fitting options.");

            double mean;
            double var;

            if (weights == null)
            {
                mean = observations.Mean();
                var = observations.Variance(mean);
            }
            else
            {
                mean = observations.WeightedMean(weights);
                var = observations.WeightedVariance(weights, mean);
            }

            if (var >= mean * (1.0 - mean))
                throw new NotSupportedException();

            double u = (mean * (1 - mean) / var) - 1.0;
            double alpha = mean * u;
            double beta = (1 - mean) * u;
            init(alpha, beta);
        }
Beispiel #24
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting,
 ///   such as regularization constants and additional parameters.</param>
 ///
 public override void Fit(double[] observations, int[] weights, IFittingOptions options)
 {
     Fit(observations, weights, options as BetaOptions);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        /// elements can be either of type double (for univariate data) or
        /// type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        /// as regularization constants and additional parameters.</param>
        /// 
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            double[] means;
            double[,] cov;

            if (weights != null)
            {
                // Compute weighted mean vector
                means = Statistics.Tools.Mean(observations, weights);

                // Compute weighted covariance matrix
                cov = Statistics.Tools.WeightedCovariance(observations, weights, means);
            }
            else
            {
                // Compute mean vector
                means = Statistics.Tools.Mean(observations);

                // Compute covariance matrix
                cov = Statistics.Tools.Covariance(observations, means);
            }

            CholeskyDecomposition chol = new CholeskyDecomposition(cov, false, true);

            if (options != null)
            {
                // Parse optional estimation options
                NormalOptions o = (NormalOptions)options;
                double regularization = o.Regularization;

                if (regularization > 0)
                {
                    int dimension = observations[0].Length;

                    while (!chol.PositiveDefinite)
                    {
                        for (int i = 0; i < dimension; i++)
                            cov[i, i] += regularization;

                        chol = new CholeskyDecomposition(cov, false, true);
                    }
                }
            }

            if (!chol.PositiveDefinite)
            {
                throw new NonPositiveDefiniteMatrixException("Covariance matrix is not positive "
                    + "definite. Try specifying a regularization constant in the fitting options.");
            }

            // Become the newly fitted distribution.
            initialize(means, cov, chol);
        }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).
 /// </param>
 /// <param name="weights">
 ///   The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 public virtual void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     throw new NotSupportedException();
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (immutable) throw new InvalidOperationException();

            double mu, var;

            if (weights != null)
            {
            #if DEBUG
                for (int i = 0; i < weights.Length; i++)
                    if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i]))
                        throw new Exception("Invalid numbers in the weight vector.");
            #endif

                // Compute weighted mean
                mu = Statistics.Tools.WeightedMean(observations, weights);

                // Compute weighted variance
                var = Statistics.Tools.WeightedVariance(observations, weights, mu);
            }
            else
            {
                // Compute weighted mean
                mu = Statistics.Tools.Mean(observations);

                // Compute weighted variance
                var = Statistics.Tools.Variance(observations, mu);
            }

            if (options != null)
            {
                // Parse optional estimation options
                NormalOptions o = (NormalOptions)options;
                double regularization = o.Regularization;

                if (var == 0 || Double.IsNaN(var) || Double.IsInfinity(var))
                    var = regularization;
            }

            if (var <= 0)
            {
                throw new ArgumentException("Variance is zero. Try specifying "
                    + "a regularization constant in the fitting options.");
            }

            initialize(mu, Math.Sqrt(var), var);
        }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        /// elements can be either of type double (for univariate data) or
        /// type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        /// as regularization constants and additional parameters.</param>
        /// 
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            double[] pi = new double[probabilities.Length];
            double size = weights.Length;

            for (int c = 0; c < probabilities.Length; c++)
            {
                for (int i = 0; i < observations.Length; i++)
                    pi[c] += observations[i][c] * weights[i] * size;
                pi[c] /= N;
            }

            initialize(N, pi);
        }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 void IDistribution.Fit(Array observations, IFittingOptions options)
 {
     (this as IDistribution).Fit(observations, null, options);
 }
 /// <summary>
 ///   This method is not supported.
 /// </summary>
 /// 
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     throw new System.NotSupportedException();
 }
Beispiel #31
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 public override void Fit(double[][] observations, int[] weights, IFittingOptions options)
 {
     Fit(observations, weights, options as MultivariateEmpiricalOptions);
 }
Beispiel #32
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        public override void Fit(double[][] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
                throw new ArgumentException("This method does not accept fitting options.");

            if (observations.Length != weights.Length)
                throw new ArgumentException("The weight vector should have the same size as the observations", "weights");

            for (int i = 0; i < probabilities.Length; i++)
                probabilities[i] = 0;

            for (int i = 0; i < observations.Length; i++)
            {
                double[] x = observations[i];

                int index = 0;
                for (int j = 0; j < x.Length; j++)
                    index += (int)x[j] * positions[j];

                probabilities[index] += weights[i];
            }


            double sum = 0;
            for (int i = 0; i < probabilities.Length; i++)
                sum += probabilities[i];

            if (sum != 0 && sum != 1)
            {
                // avoid locking a parameter in zero.
                // if (num == 0) num = 1e-10;

                // assert that probabilities sum up to 1.
                for (int i = 0; i < probabilities.Length; i++)
                    probabilities[i] /= sum;
            }
        }
Beispiel #33
0
 /// <summary>
 ///   Initializes a new instance of the <see cref="IndependentOptions"/> class.
 /// </summary>
 ///
 /// <param name="innerOption">The fitting options for the inner
 ///   component distributions of the independent distributions.</param>
 ///
 public IndependentOptions(IFittingOptions innerOption)
 {
     InnerOption = innerOption;
 }
Beispiel #34
0
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
                throw new ArgumentException("This method does not accept fitting options.");

            // R. Kolar, R. Jirik, J. Jan (2004) "Estimator Comparison of the
            // Nakagami-m Parameter and Its Application in Echocardiography", 
            // Radioengineering, 13 (1), 8–12

            double[] x2 = Matrix.ElementwisePower(observations, 2);

            double mean, var;
            if (weights == null)
            {
                mean = Statistics.Tools.Mean(x2);
                var = Statistics.Tools.Variance(x2);
            }
            else
            {
                mean = Statistics.Tools.WeightedMean(x2, weights);
                var = Statistics.Tools.WeightedVariance(x2, weights);
            }

            double shape = (mean * mean) / var;
            double spread = mean;

            init(shape, spread);
        }
 /// <summary>
 ///   Not supported.
 /// </summary>
 ///
 public override void Fit(double[][,] observations, double[] weights, IFittingOptions options)
 {
     throw new NotSupportedException();
 }
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 public virtual void Fit(double[] observations, IFittingOptions options)
 {
     Fit(observations, null, options);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        ///
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        ///
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            double mean = Statistics.Tools.WeightedMean(observations, weights);

            initialize(mean);
        }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// <param name="observations">
        ///   The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="options">
        ///   Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        void IDistribution.Fit(Array observations, IFittingOptions options)
        {
            double[] weights = new double[observations.Length];

            // Create equal weights for the observations
            double w = 1.0 / observations.Length;
            for (int i = 0; i < weights.Length; i++)
                weights[i] = w;

            (this as IDistribution).Fit(observations, weights, options);
        }
Beispiel #39
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).
 /// </param>
 /// <param name="weights">
 ///   The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">
 ///   Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 ///
 public abstract void Fit(double[] observations, double[] weights, IFittingOptions options);
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">
        ///   The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="options">
        ///   Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public virtual void Fit(double[][] observations, IFittingOptions options)
        {
            double[] weights = new double[observations.Length];

            // Create equal weights for the observations
            double w = 1.0 / observations.Length;
            for (int i = 0; i < weights.Length; i++)
                weights[i] = w;

            Fit(observations, weights, options);
        }
Beispiel #41
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 ///
 /// <param name="observations">
 ///   The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting,
 ///   such as regularization constants and additional parameters.</param>
 ///
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     Fit(observations, weights, options as TriangularOptions);
 }
        /// <summary>
        ///   Fits the underlying distribution to a given set of observations.
        /// </summary>
        /// 
        /// <param name="observations">The array of observations to fit the model against. The array
        ///   elements can be either of type double (for univariate data) or
        ///   type double[] (for multivariate data).</param>
        /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
        /// <param name="options">Optional arguments which may be used during fitting, such
        ///   as regularization constants and additional parameters.</param>
        ///   
        /// <remarks>
        ///   Although both double[] and double[][] arrays are supported,
        ///   providing a double[] for a multivariate distribution or a
        ///   double[][] for a univariate distribution may have a negative
        ///   impact in performance.
        /// </remarks>
        /// 
        public override void Fit(double[] observations, double[] weights, IFittingOptions options)
        {
            if (options != null)
                throw new ArgumentException("This method does not accept fitting options.");

            double mean;
            double lambda;
            int n = observations.Length;

            if (weights == null)
            {
                mean = observations.Mean();

                double sum = 0;
                for (int i = 0; i < observations.Length; i++)
                    sum += (1.0 / observations[i] - 1.0 / mean);
                lambda = (n * n) / sum;
            }
            else
            {
                mean = observations.WeightedMean(observations);

                double sum = 0;
                for (int i = 0; i < observations.Length; i++)
                    sum += weights[i] * (1.0 / observations[i] - 1.0 / mean);
                lambda = n / sum;
            }

            init(mean, lambda);
        }
Beispiel #43
0
 /// <summary>
 ///   Fits the underlying distribution to a given set of observations.
 /// </summary>
 /// 
 /// <param name="observations">The array of observations to fit the model against. The array
 ///   elements can be either of type double (for univariate data) or
 ///   type double[] (for multivariate data).</param>
 /// <param name="weights">The weight vector containing the weight for each of the samples.</param>
 /// <param name="options">Optional arguments which may be used during fitting, such
 ///   as regularization constants and additional parameters.</param>
 ///   
 /// <remarks>
 ///   Although both double[] and double[][] arrays are supported,
 ///   providing a double[] for a multivariate distribution or a
 ///   double[][] for a univariate distribution may have a negative
 ///   impact in performance.
 /// </remarks>
 /// 
 public override void Fit(double[] observations, double[] weights, IFittingOptions options)
 {
     throw new NotImplementedException();
 }