Exemple #1
0
        public void ForwardBackwardTest()
        {
            HiddenMarkovModel hmm = CreateModel1();

            //                     G  G  C  A
            int[] observations = { 2, 2, 1, 0 };

            double fwdLogLikelihood;

            double[,] fwd = ForwardBackwardAlgorithm.Forward(hmm, observations, out fwdLogLikelihood);

            double bwdLogLikelihood;

            double[,] bwd = ForwardBackwardAlgorithm.Backward(hmm, observations, out bwdLogLikelihood);

            Assert.AreEqual(fwdLogLikelihood, bwdLogLikelihood, 1e-10); // -5.5614629361549142
        }
Exemple #2
0
        public void ForwardScalingTest2()
        {
            HiddenMarkovModel hmm = CreateModel2();

            //                     A  B  B  A
            int[] observations = { 0, 1, 1, 0 };

            double[] scaling;
            double   logLikelihood;

            double[,] actual = ForwardBackwardAlgorithm.Forward(hmm, observations, out scaling, out logLikelihood);

            double p = System.Math.Exp(logLikelihood);

            Assert.AreEqual(0.054814695, p, 1e-8);
            Assert.IsFalse(double.IsNaN(p));
        }
        public void LogForwardBackwardGenericTest()
        {
            var discreteModel = CreateModel1();
            var genericModel  = CreateModel4();

            int[]      discreteObservations = { 2, 2, 1, 0 };
            double[][] genericObservations  =
            {
                new double[] { 2 }, new double[] { 2 },
                new double[] { 1 }, new double[] { 0 }
            };

            double discreteFwdLogLikelihood;

            double[,] discreteFwd = ForwardBackwardAlgorithm.LogForward(discreteModel,
                                                                        discreteObservations, out discreteFwdLogLikelihood);

            double discreteBwdLogLikelihood;

            double[,] discreteBwd = ForwardBackwardAlgorithm.LogBackward(discreteModel,
                                                                         discreteObservations, out discreteBwdLogLikelihood);

            double genericFwdLogLikelihood;

            double[,] genericFwd = ForwardBackwardAlgorithm.LogForward(genericModel,
                                                                       genericObservations, out genericFwdLogLikelihood);

            double genericBwdLogLikelihood;

            double[,] genericBwd = ForwardBackwardAlgorithm.LogBackward(genericModel,
                                                                        genericObservations, out genericBwdLogLikelihood);

            Assert.AreEqual(discreteFwdLogLikelihood, discreteBwdLogLikelihood);
            Assert.AreEqual(genericFwdLogLikelihood, genericBwdLogLikelihood);
            Assert.AreEqual(discreteFwdLogLikelihood, genericBwdLogLikelihood);

            for (int i = 0; i < discreteFwd.GetLength(0); i++)
            {
                for (int j = 0; j < discreteFwd.GetLength(1); j++)
                {
                    Assert.AreEqual(discreteFwd[i, j], genericFwd[i, j]);
                    Assert.AreEqual(discreteBwd[i, j], genericBwd[i, j]);
                }
            }
        }
        public void LogBackwardTest2()
        {
            HiddenMarkovModel hmm = Accord.Tests.Statistics.Models.Markov.
                                    ForwardBackwardAlgorithmTest.CreateModel3();

            int[] observations = { 0, 0, 1, 1 };

            double[,] expected = Matrix.Log(
                ForwardBackwardAlgorithm.Backward(hmm, observations));

            double[,] actual =
                ForwardBackwardAlgorithm.LogBackward(hmm, observations);

            Assert.IsTrue(expected.IsEqual(actual, 1e-10));

            foreach (double p in actual)
            {
                Assert.IsFalse(double.IsNaN(p));
            }
        }
        public void ForwardScalingTest()
        {
            HiddenMarkovModel hmm = CreateModel1();

            var P = Matrix.Exp(hmm.Probabilities);
            var A = Matrix.Exp(hmm.Transitions);
            var B = Matrix.Exp(hmm.Emissions);

            //                     G  G  C  A
            int[] observations = { 2, 2, 1, 0 };

            double[] scaling;
            double   logLikelihood;

            double[,] actual = ForwardBackwardAlgorithm.Forward(hmm, observations, out scaling, out logLikelihood);

            double a00 = P[0] * B[0, 2];
            double a01 = P[1] * B[1, 2];
            double t0  = a00 + a01;

            a00 /= t0;
            a01 /= t0;

            double a10 = (a00 * A[0, 0] + a01 * A[1, 0]) * B[0, 2];
            double a11 = (a01 * A[1, 1] + a00 * A[0, 1]) * B[1, 2];
            double t1  = a10 + a11;

            a10 /= t1;
            a11 /= t1;

            double a20 = (a10 * A[0, 0] + a11 * A[1, 0]) * B[0, 1];
            double a21 = (a11 * A[1, 1] + a10 * A[0, 1]) * B[1, 1];
            double t2  = a20 + a21;

            a20 /= t2;
            a21 /= t2;

            double a30 = (a20 * A[0, 0] + a21 * A[1, 0]) * B[0, 0];
            double a31 = (a21 * A[1, 1] + a20 * A[0, 1]) * B[1, 0];
            double t3  = a30 + a31;

            a30 /= t3;
            a31 /= t3;

            Assert.AreEqual(a00, actual[0, 0]);
            Assert.AreEqual(a01, actual[0, 1]);

            Assert.AreEqual(a10, actual[1, 0]);
            Assert.AreEqual(a11, actual[1, 1]);

            Assert.AreEqual(a20, actual[2, 0]);
            Assert.AreEqual(a21, actual[2, 1]);

            Assert.AreEqual(a30, actual[3, 0]);
            Assert.AreEqual(a31, actual[3, 1]);


            double p = System.Math.Exp(logLikelihood);

            Assert.AreEqual(0.00384315, p, 1e-8);
            Assert.IsFalse(double.IsNaN(p));
        }
        /// <summary>
        ///   Computes the gradient using the 
        ///   input/outputs stored in this object.
        /// </summary>
        /// 
        /// <returns>The value of the gradient vector for the given parameters.</returns>
        /// 
        protected double[] Gradient()
        {
            // Localize thread locals
            var logLikelihoods = this.logLikelihoods.Value;
            var inputs = this.inputs.Value;
            var outputs = this.outputs.Value;
            var lnZx = this.lnZx.Value;
            var lnZxy = this.lnZxy.Value;
            var gradient = this.gradient.Value;

            double error = 0;

            // The previous call to Objective could have computed
            // the log-likelihoods for all input values. However, if
            // this hasn't been the case, compute them now:

            if (logLikelihoods == null)
                model.LogLikelihood(inputs, outputs, out logLikelihoods);

            // Compute the partition function using the previously
            // computed likelihoods. Also compute the total error

            // For each x, compute lnZ(x) and lnZ(x,y)
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] lli = logLikelihoods[i];

                // Compute the marginal likelihood
                double sum = Double.NegativeInfinity;
                for (int j = 0; j < lli.Length; j++)
                    sum = Special.LogSum(sum, lli[j]);

                lnZx[i] = sum;
                lnZxy[i] = lli[outputs[i]];

                // compute and return the negative
                // log-likelihood as error function
                error -= lnZxy[i] - lnZx[i];
            }

            // Now start computing the gradient w.r.t to the
            // feature functions. Each feature function belongs
            // to a factor potential function, so:

            // For each clique potential (factor potential function)
#if DEBUG
            for (int c = 0; c < function.Factors.Length; c++)
#else
            Parallel.For(0, function.Factors.Length, c =>
#endif
            {
                FactorPotential<T> factor = function.Factors[c];

                int factorIndex = factor.Index;

                // Compute all forward and backward matrices to be
                //  used in the feature functions marginal computations.

                var lnFwds = new double[inputs.Length][,];
                var lnBwds = new double[inputs.Length][,];
                for (int i = 0; i < inputs.Length; i++)
                {
                    lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex);
                    lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex);
                }

                double[] marginals = new double[function.Outputs];

                // For each feature in the factor potential function
                int end = factor.ParameterIndex + factor.ParameterCount;
                for (int k = factor.ParameterIndex; k < end; k++)
                {
                    IFeature<T> feature = function.Features[k];
                    double parameter = function.Weights[k];

                    if (Double.IsInfinity(parameter))
                    {
                        gradient[k] = 0; continue;
                    }


                    // Compute the two marginal sums for the gradient calculation
                    // as given in eq. 1.52 of Sutton, McCallum; "An introduction to
                    // Conditional Random Fields for Relational Learning". The sums
                    // will be computed in the log domain for numerical stability.

                    double lnsum1 = Double.NegativeInfinity;
                    double lnsum2 = Double.NegativeInfinity;

                    // For each training sample (sequences)
                    for (int i = 0; i < inputs.Length; i++)
                    {
                        T[] x = inputs[i]; // training input
                        int y = outputs[i];  // training output

                        // Compute marginals for all possible outputs
                        for (int j = 0; j < marginals.Length; j++)
                            marginals[j] = Double.NegativeInfinity;

                        // However, making the assumption that each factor is responsible for only 
                        // one output label, we can compute the marginal only for the current factor
                        marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex);

                        // The first term contains a marginal probability p(w|x,y), which is
                        // exactly a marginal distribution of the clamped CRF (eq. 1.46).
                        lnsum1 = Special.LogSum(lnsum1, marginals[y] - lnZxy[i]);

                        // The second term contains a different marginal p(w,y|x) which is the
                        // same marginal probability required in as fully-observed CRF.
                        for (int j = 0; j < marginals.Length; j++)
                            lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]);

#if DEBUG
                        if (Double.IsNaN(lnsum1) || Double.IsNaN(lnsum2))
                            throw new Exception();
#endif
                    }

                    // Compute the current derivative
                    double sum1 = Math.Exp(lnsum1);
                    double sum2 = Math.Exp(lnsum2);
                    double derivative = sum1 - sum2;

                    if (sum1 == sum2) derivative = 0;

#if DEBUG
                    if (Double.IsNaN(derivative))
                        throw new Exception();
#endif

                    // Include regularization derivative if required
                    if (sigma != 0) derivative -= parameter / sigma;

                    gradient[k] = -derivative;
                }
            }
#if !DEBUG
);
#endif

            // Reset log-likelihoods so they are recomputed in the next run,
            // either by the Objective function or by the Gradient calculation.

            this.logLikelihoods.Value = null;
            this.error.Value = error;

            return gradient; // return the gradient.
        }
Exemple #7
0
        private double[] gradient(T[][] observations, int[][] labels)
        {
            int N = observations.Length;

            var function = model.Function;
            var states   = model.States;

            double[] g = new double[function.Weights.Length];


            // Compute sequence probabilities
            var P = new double[N][, ][];

            for (int i = 0; i < N; i++)
            {
                var Pi = P[i] = new double[states + 1, states][];

                T[]    x   = observations[i];
                var    fwd = ForwardBackwardAlgorithm.Forward(function.Factors[0], x, 0);
                var    bwd = ForwardBackwardAlgorithm.Backward(function.Factors[0], x, 0);
                double z   = partition(fwd, x);

                for (int prev = -1; prev < states; prev++)
                {
                    for (int next = 0; next < states; next++)
                    {
                        double[] Pis = new double[x.Length];
                        for (int t = 0; t < x.Length; t++)
                        {
                            Pis[t] = p(prev, next, x, t, fwd, bwd, function) / z;
                        }

                        Pi[prev + 1, next] = Pis;
                    }
                }
            }

            // Compute the gradient w.r.t. each feature
            //  function in the model's potential function.
            for (int k = 0; k < g.Length; k++)
            {
                var feature = function.Features[k];

                double sum1 = 0.0, sum2 = 0.0;
                for (int i = 0; i < N; i++)
                {
                    T[]   x  = observations[i];
                    int[] y  = labels[i];
                    var   Pi = P[i];

                    // Compute first term of the partial derivative
                    sum1 += feature.Compute(-1, y[0], x, 0);
                    for (int t = 1; t < x.Length; t++)
                    {
                        sum1 += feature.Compute(y[t - 1], y[t], x, t);
                    }

                    // Compute second term of the partial derivative
                    for (int prev = -1; prev < states; prev++)
                    {
                        for (int next = 0; next < states; next++)
                        {
                            double[] Pis = Pi[prev + 1, next];

                            for (int t = 0; t < Pis.Length; t++)
                            {
                                sum2 += feature.Compute(prev, next, x, t) * Pis[t];
                            }
                        }
                    }
                }

                g[k] = -(sum1 - sum2);
            }

            return(g);
        }
Exemple #8
0
 /// <summary>
 ///   Computes the forward and backward probabilities matrices
 ///   for a given observation referenced by its index in the
 ///   input training data.
 /// </summary>
 /// <param name="index">The index of the observation in the input training data.</param>
 /// <param name="fwd">Returns the computed forward probabilities matrix.</param>
 /// <param name="bwd">Returns the computed backward probabilities matrix.</param>
 /// <param name="scaling">Returns the scaling parameters used during calculations.</param>
 protected override void ComputeForwardBackward(int index, out double[,] fwd, out double[,] bwd,
                                                out double[] scaling)
 {
     fwd = ForwardBackwardAlgorithm.Forward(model, continuousObservations[index], out scaling);
     bwd = ForwardBackwardAlgorithm.Backward(model, continuousObservations[index], scaling);
 }
        private double[] gradient(T[][] observations, int[][] labels, double[] g)
        {
            var model    = Model;
            var function = model.Function;
            int states   = model.States;
            int n        = observations.Length;
            int d        = Model.Function.Weights.Length;
            int Tmax     = observations.Max(x => x.Length);
            int progress = 0;

            g.Clear();


            // Compute sequence probabilities
            Parallel.For(0, observations.Length, ParallelOptions,

                         () =>
            {
                // Create thread-local storage
                var work = new double[states + 1, states][];
                for (int j = 0; j < states + 1; j++)
                {
                    for (int k = 0; k < states; k++)
                    {
                        work[j, k] = new double[Tmax];
                    }
                }

                return(new
                {
                    bwd = new double[Tmax, states],
                    fwd = new double[Tmax, states],
                    sum1 = new double[d],
                    sum2 = new double[d],
                    work = work,
                    count = new int[] { 0 }
                });
            },

                         (i, state, local) =>
            {
                T[] x    = observations[i];
                var fwd  = local.fwd;
                var bwd  = local.bwd;
                var sum1 = local.sum1;
                var sum2 = local.sum2;
                var work = local.work;
                ForwardBackwardAlgorithm.Forward(function.Factors[0], x, fwd);
                ForwardBackwardAlgorithm.Backward(function.Factors[0], x, bwd);
                double z = partition(fwd, x);

                for (int prev = -1; prev < states; prev++)
                {
                    for (int next = 0; next < states; next++)
                    {
                        double[] Pis = work[prev + 1, next];
                        for (int t = 0; t < x.Length; t++)
                        {
                            Pis[t] = p(prev, next, x, t, fwd, bwd, function) / z;
                        }
                    }
                }

                // Compute the gradient w.r.t. each feature
                //  function in the model's potential function.

                int[] y = labels[i];

                Parallel.For(0, g.Length, ParallelOptions, k =>
                {
                    IFeature <T> feature = function.Features[k];

                    // Compute first term of the partial derivative
                    sum1[k] += feature.Compute(-1, y[0], x, 0);
                    for (int t = 1; t < x.Length; t++)
                    {
                        sum1[k] += feature.Compute(y[t - 1], y[t], x, t);
                    }

                    // Compute second term of the partial derivative
                    for (int prev = -1; prev < states; prev++)
                    {
                        for (int next = 0; next < states; next++)
                        {
                            double[] Pis = work[prev + 1, next];
                            for (int t = 0; t < Pis.Length; t++)
                            {
                                sum2[k] += feature.Compute(prev, next, x, t) * Pis[t];
                            }
                        }
                    }
                });

                local.count[0]++;
                return(local);
            },

                         (local) =>
            {
                lock (g)
                {
                    for (int k = 0; k < g.Length; k++)
                    {
                        g[k] -= (local.sum1[k] - local.sum2[k]);
                    }
                    progress += local.count[0];
                }
            }
                         );

            return(g);
        }
        public double Run(int[][] observations_db, double[] weights)
        {
            ValidationHelper.ValidateObservationDb(observations_db, 0, mModel.SymbolCount);

            int K = observations_db.Length;

            mLogWeights = new double[K];
            if (weights != null)
            {
                for (int k = 0; k < K; ++k)
                {
                    mLogWeights[k] = System.Math.Log(weights[k]);
                }
            }

            int    N   = mModel.StateCount;
            double lnK = System.Math.Log(K);

            double[,] logA = mModel.LogTransitionMatrix;
            double[,] logB = mModel.LogEmissionMatrix;
            double[] logPi = mModel.LogProbabilityVector;

            int M = mModel.SymbolCount;

            mLogGamma = new double[K][, ];
            mLogKsi   = new double[K][][, ];

            for (int k = 0; k < K; ++k)
            {
                int T = observations_db[k].Length;
                mLogGamma[k] = new double[T, N];
                mLogKsi[k]   = new double[T][, ];

                for (int t = 0; t < T; ++t)
                {
                    mLogKsi[k][t] = new double[N, N];
                }
            }

            int maxT = observations_db.Max(x => x.Length);

            double[,] lnfwd = new double[maxT, N];
            double[,] lnbwd = new double[maxT, N];

            // Initialize the model log-likelihoods
            double newLogLikelihood = Double.NegativeInfinity;
            double oldLogLikelihood = Double.NegativeInfinity;

            int    iteration          = 0;
            double deltaLogLikelihood = 0;
            bool   should_continue    = true;

            do // Until convergence or max iterations is reached
            {
                oldLogLikelihood = newLogLikelihood;

                for (int k = 0; k < K; ++k)
                {
                    int[] observations = observations_db[k];
                    double[,] logGamma = mLogGamma[k];
                    double[][,] logKsi = mLogKsi[k];
                    double w = mLogWeights[k];
                    int    T = observations.Length;

                    ForwardBackwardAlgorithm.LogForward(logA, logB, logPi, observations, lnfwd);
                    ForwardBackwardAlgorithm.LogBackward(logA, logB, logPi, observations, lnbwd);

                    // Compute Gamma values
                    for (int t = 0; t < T; ++t)
                    {
                        double lnsum = double.NegativeInfinity;
                        for (int i = 0; i < N; ++i)
                        {
                            logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w;
                            lnsum          = LogHelper.LogSum(lnsum, logGamma[t, i]);
                        }
                        if (lnsum != Double.NegativeInfinity)
                        {
                            for (int i = 0; i < N; ++i)
                            {
                                logGamma[t, i] = logGamma[t, i] - lnsum;
                            }
                        }
                    }

                    // Compute Ksi values
                    for (int t = 0; t < T - 1; ++t)
                    {
                        double lnsum = double.NegativeInfinity;
                        int    x     = observations[t + 1];

                        for (int i = 0; i < N; ++i)
                        {
                            for (int j = 0; j < N; ++j)
                            {
                                logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + logB[j, x] + w;
                                lnsum           = LogHelper.LogSum(lnsum, logKsi[t][i, j]);
                            }
                        }

                        for (int i = 0; i < N; ++i)
                        {
                            for (int j = 0; j < N; ++j)
                            {
                                logKsi[t][i, j] = logKsi[t][i, j] - lnsum;
                            }
                        }
                    }

                    newLogLikelihood = Double.NegativeInfinity;
                    for (int i = 0; i < N; ++i)
                    {
                        newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]);
                    }
                }

                newLogLikelihood /= K;

                deltaLogLikelihood = newLogLikelihood - oldLogLikelihood;

                iteration++;

                //Console.WriteLine("Iteration: {0}", iteration);

                if (ShouldTerminate(deltaLogLikelihood, iteration))
                {
                    should_continue = false;
                }
                else
                {
                    // update pi
                    for (int i = 0; i < N; ++i)
                    {
                        double lnsum = double.NegativeInfinity;
                        for (int k = 0; k < K; ++k)
                        {
                            lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]);
                        }
                        logPi[i] = lnsum - lnK;
                    }

                    // update A
                    for (int i = 0; i < N; ++i)
                    {
                        for (int j = 0; j < N; ++j)
                        {
                            double lndenom = double.NegativeInfinity;
                            double lnnum   = double.NegativeInfinity;

                            for (int k = 0; k < K; ++k)
                            {
                                int T = observations_db[k].Length;

                                for (int t = 0; t < T - 1; ++t)
                                {
                                    lnnum   = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]);
                                    lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]);
                                }
                            }

                            logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom;
                        }
                    }

                    // update B
                    for (int i = 0; i < N; ++i)
                    {
                        for (int m = 0; m < M; ++m)
                        {
                            double lndenom = double.NegativeInfinity;
                            double lnnum   = double.NegativeInfinity;

                            for (int k = 0; k < K; ++k)
                            {
                                int[] observations = observations_db[k];
                                int   T            = observations.Length;

                                for (int t = 0; t < T; ++t)
                                {
                                    lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]);

                                    if (observations[t] == m)
                                    {
                                        lnnum = LogHelper.LogSum(lnnum, mLogGamma[k][t, i]);
                                    }
                                }
                            }
                            logB[i, m] = lnnum - lndenom;
                        }
                    }
                }
            } while (should_continue);

            return(newLogLikelihood);
        }
        /// <summary>
        /// for univariate
        /// </summary>
        /// <param name="observations_db"></param>
        /// <param name="weights"></param>
        /// <returns></returns>
        public double Run(double[][] observations_db, double[] weights)
        {
            DiagnosticsHelper.Assert(mModel.Dimension == 1);

            int K = observations_db.Length;

            mLogWeights = new double[K];
            if (weights != null)
            {
                for (int k = 0; k < K; ++k)
                {
                    mLogWeights[k] = System.Math.Log(weights[k]);
                }
            }

            double[] observations_db_1d = MathHelper.Concatenate <double>(observations_db);
            double[] Bweights           = new double[observations_db_1d.Length];

            int    N   = mModel.StateCount;
            double lnK = System.Math.Log(K);

            double[,] logA = mModel.LogTransitionMatrix;
            DistributionModel[] probB = mModel.EmissionModels;
            double[]            logPi = mModel.LogProbabilityVector;

            int M = mModel.SymbolCount;

            mLogGamma = new double[K][, ];
            mLogKsi   = new double[K][][, ];

            for (int k = 0; k < K; ++k)
            {
                int T = observations_db[k].Length;
                mLogGamma[k] = new double[T, N];
                mLogKsi[k]   = new double[T][, ];

                for (int t = 0; t < T; ++t)
                {
                    mLogKsi[k][t] = new double[N, N];
                }
            }

            int maxT = observations_db.Max(x => x.Length);

            double[,] lnfwd = new double[maxT, N];
            double[,] lnbwd = new double[maxT, N];

            // Initialize the model log-likelihoods
            double newLogLikelihood = Double.NegativeInfinity;
            double oldLogLikelihood = Double.NegativeInfinity;

            int    iteration          = 0;
            double deltaLogLikelihood = 0;
            bool   should_continue    = true;

            do // Until convergence or max iterations is reached
            {
                oldLogLikelihood = newLogLikelihood;

                for (int k = 0; k < K; ++k)
                {
                    double[] observations = observations_db[k];
                    double[,] logGamma = mLogGamma[k];
                    double[][,] logKsi = mLogKsi[k];
                    double w = mLogWeights[k];
                    int    T = observations.Length;

                    ForwardBackwardAlgorithm.LogForward(logA, probB, logPi, observations, lnfwd);
                    ForwardBackwardAlgorithm.LogBackward(logA, probB, logPi, observations, lnbwd);

                    // Compute Gamma values
                    for (int t = 0; t < T; ++t)
                    {
                        double lnsum = double.NegativeInfinity;
                        for (int i = 0; i < N; ++i)
                        {
                            logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w;
                            lnsum          = LogHelper.LogSum(lnsum, logGamma[t, i]);
                        }
                        if (lnsum != Double.NegativeInfinity)
                        {
                            for (int i = 0; i < N; ++i)
                            {
                                logGamma[t, i] = logGamma[t, i] - lnsum;
                            }
                        }
                    }

                    // Compute Ksi values
                    for (int t = 0; t < T - 1; ++t)
                    {
                        double lnsum = double.NegativeInfinity;
                        double x     = observations[t + 1];

                        for (int i = 0; i < N; ++i)
                        {
                            for (int j = 0; j < N; ++j)
                            {
                                logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + MathHelper.LogProbabilityFunction(probB[j], x) + w;
                                lnsum           = LogHelper.LogSum(lnsum, logKsi[t][i, j]);
                            }
                        }

                        if (lnsum != double.NegativeInfinity)
                        {
                            for (int i = 0; i < N; ++i)
                            {
                                for (int j = 0; j < N; ++j)
                                {
                                    logKsi[t][i, j] = logKsi[t][i, j] - lnsum;
                                }
                            }
                        }
                    }

                    newLogLikelihood = Double.NegativeInfinity;
                    for (int i = 0; i < N; ++i)
                    {
                        newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]);
                    }
                }

                newLogLikelihood /= K;

                deltaLogLikelihood = newLogLikelihood - oldLogLikelihood;

                iteration++;

                if (ShouldTerminate(deltaLogLikelihood, iteration))
                {
                    should_continue = false;
                }
                else
                {
                    // update pi
                    for (int i = 0; i < N; ++i)
                    {
                        double lnsum = double.NegativeInfinity;
                        for (int k = 0; k < K; ++k)
                        {
                            lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]);
                        }
                        logPi[i] = lnsum - lnK;
                    }

                    // update A
                    for (int i = 0; i < N; ++i)
                    {
                        for (int j = 0; j < N; ++j)
                        {
                            double lndenom = double.NegativeInfinity;
                            double lnnum   = double.NegativeInfinity;

                            for (int k = 0; k < K; ++k)
                            {
                                int T = observations_db[k].Length;

                                for (int t = 0; t < T - 1; ++t)
                                {
                                    lnnum   = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]);
                                    lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]);
                                }
                            }

                            logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom;
                        }
                    }


                    // update B
                    for (int i = 0; i < N; ++i)
                    {
                        double lnsum = double.NegativeInfinity;

                        for (int k = 0, w = 0; k < K; ++k)
                        {
                            double[] observations = observations_db[k];
                            int      T            = observations.Length;

                            for (int t = 0; t < T; ++t, ++w)
                            {
                                Bweights[w] = mLogGamma[k][t, i];
                                lnsum       = LogHelper.LogSum(lnsum, Bweights[w]);
                            }
                        }

                        if (lnsum != double.NegativeInfinity)
                        {
                            for (int w = 0; w < Bweights.Length; ++w)
                            {
                                Bweights[w] = Bweights[w] - lnsum;
                            }
                        }

                        for (int w = 0; w < Bweights.Length; ++w)
                        {
                            Bweights[w] = System.Math.Exp(Bweights[w]);
                        }

                        probB[i].Process(observations_db_1d, Bweights);
                    }
                }
            } while (should_continue);

            return(newLogLikelihood);
        }
Exemple #12
0
        private void InnerGradient(FactorPotential <T> factor, T[][] inputs, int[] outputs, double[] lnZx, double[] lnZxy, double[] gradient)
        {
            int factorIndex = factor.Index;

            // Compute all forward and backward matrices to be
            //  used in the feature functions marginal computations.

            double[][,] lnFwds = new double[inputs.Length][, ];
            double[][,] lnBwds = new double[inputs.Length][, ];
            for (int i = 0; i < inputs.Length; i++)
            {
                lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex);
                lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex);
            }

            double[] marginals = new double[function.Outputs];

            // For each feature in the factor potential function
            int end = factor.FactorParameters.Offset + factor.FactorParameters.Count;

            for (int k = factor.FactorParameters.Offset; k < end; k++)
            {
                IFeature <T> feature   = function.Features[k];
                double       parameter = function.Weights[k];

                if (Double.IsInfinity(parameter))
                {
                    gradient[k] = 0; continue;
                }


                // Compute the two marginal sums for the gradient calculation
                // as given in eq. 1.52 of Sutton, McCallum; "An introduction to
                // Conditional Random Fields for Relational Learning". The sums
                // will be computed in the log domain for numerical stability.

                double lnsum1 = Double.NegativeInfinity;
                double lnsum2 = Double.NegativeInfinity;

                // For each training sample (sequences)
                for (int i = 0; i < inputs.Length; i++)
                {
                    T[] x = inputs[i];  // training input
                    int y = outputs[i]; // training output

                    // Compute marginals for all possible outputs
                    for (int j = 0; j < marginals.Length; j++)
                    {
                        marginals[j] = Double.NegativeInfinity;
                    }

                    // However, making the assumption that each factor is responsible for only
                    // one output label, we can compute the marginal only for the current factor
                    marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex);

                    // The first term contains a marginal probability p(w|x,y), which is
                    // exactly a marginal distribution of the clamped CRF (eq. 1.46).
                    lnsum1 = Special.LogSum(lnsum1, (marginals[y] == lnZxy[i]) ? 0 : marginals[y] - lnZxy[i]);

                    // The second term contains a different marginal p(w,y|x) which is the
                    // same marginal probability required in as fully-observed CRF.
                    for (int j = 0; j < marginals.Length; j++)
                    {
                        lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]);
                    }

                    Accord.Diagnostics.Debug.Assert(!marginals.HasNaN());
                    Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum1));
                    Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum2));
                }

                // Compute the current derivative
                double sum1       = Math.Exp(lnsum1);
                double sum2       = Math.Exp(lnsum2);
                double derivative = sum1 - sum2;

                if (sum1 == sum2)
                {
                    derivative = 0;
                }

                Accord.Diagnostics.Debug.Assert(!Double.IsNaN(derivative));

                // Include regularization derivative if required
                if (sigma != 0)
                {
                    derivative -= parameter / sigma;
                }

                gradient[k] = -derivative;
            }
        }