public void ForwardBackwardTest() { HiddenMarkovModel hmm = CreateModel1(); // G G C A int[] observations = { 2, 2, 1, 0 }; double fwdLogLikelihood; double[,] fwd = ForwardBackwardAlgorithm.Forward(hmm, observations, out fwdLogLikelihood); double bwdLogLikelihood; double[,] bwd = ForwardBackwardAlgorithm.Backward(hmm, observations, out bwdLogLikelihood); Assert.AreEqual(fwdLogLikelihood, bwdLogLikelihood, 1e-10); // -5.5614629361549142 }
public void ForwardScalingTest2() { HiddenMarkovModel hmm = CreateModel2(); // A B B A int[] observations = { 0, 1, 1, 0 }; double[] scaling; double logLikelihood; double[,] actual = ForwardBackwardAlgorithm.Forward(hmm, observations, out scaling, out logLikelihood); double p = System.Math.Exp(logLikelihood); Assert.AreEqual(0.054814695, p, 1e-8); Assert.IsFalse(double.IsNaN(p)); }
public void LogForwardBackwardGenericTest() { var discreteModel = CreateModel1(); var genericModel = CreateModel4(); int[] discreteObservations = { 2, 2, 1, 0 }; double[][] genericObservations = { new double[] { 2 }, new double[] { 2 }, new double[] { 1 }, new double[] { 0 } }; double discreteFwdLogLikelihood; double[,] discreteFwd = ForwardBackwardAlgorithm.LogForward(discreteModel, discreteObservations, out discreteFwdLogLikelihood); double discreteBwdLogLikelihood; double[,] discreteBwd = ForwardBackwardAlgorithm.LogBackward(discreteModel, discreteObservations, out discreteBwdLogLikelihood); double genericFwdLogLikelihood; double[,] genericFwd = ForwardBackwardAlgorithm.LogForward(genericModel, genericObservations, out genericFwdLogLikelihood); double genericBwdLogLikelihood; double[,] genericBwd = ForwardBackwardAlgorithm.LogBackward(genericModel, genericObservations, out genericBwdLogLikelihood); Assert.AreEqual(discreteFwdLogLikelihood, discreteBwdLogLikelihood); Assert.AreEqual(genericFwdLogLikelihood, genericBwdLogLikelihood); Assert.AreEqual(discreteFwdLogLikelihood, genericBwdLogLikelihood); for (int i = 0; i < discreteFwd.GetLength(0); i++) { for (int j = 0; j < discreteFwd.GetLength(1); j++) { Assert.AreEqual(discreteFwd[i, j], genericFwd[i, j]); Assert.AreEqual(discreteBwd[i, j], genericBwd[i, j]); } } }
public void LogBackwardTest2() { HiddenMarkovModel hmm = Accord.Tests.Statistics.Models.Markov. ForwardBackwardAlgorithmTest.CreateModel3(); int[] observations = { 0, 0, 1, 1 }; double[,] expected = Matrix.Log( ForwardBackwardAlgorithm.Backward(hmm, observations)); double[,] actual = ForwardBackwardAlgorithm.LogBackward(hmm, observations); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); foreach (double p in actual) { Assert.IsFalse(double.IsNaN(p)); } }
public void ForwardScalingTest() { HiddenMarkovModel hmm = CreateModel1(); var P = Matrix.Exp(hmm.Probabilities); var A = Matrix.Exp(hmm.Transitions); var B = Matrix.Exp(hmm.Emissions); // G G C A int[] observations = { 2, 2, 1, 0 }; double[] scaling; double logLikelihood; double[,] actual = ForwardBackwardAlgorithm.Forward(hmm, observations, out scaling, out logLikelihood); double a00 = P[0] * B[0, 2]; double a01 = P[1] * B[1, 2]; double t0 = a00 + a01; a00 /= t0; a01 /= t0; double a10 = (a00 * A[0, 0] + a01 * A[1, 0]) * B[0, 2]; double a11 = (a01 * A[1, 1] + a00 * A[0, 1]) * B[1, 2]; double t1 = a10 + a11; a10 /= t1; a11 /= t1; double a20 = (a10 * A[0, 0] + a11 * A[1, 0]) * B[0, 1]; double a21 = (a11 * A[1, 1] + a10 * A[0, 1]) * B[1, 1]; double t2 = a20 + a21; a20 /= t2; a21 /= t2; double a30 = (a20 * A[0, 0] + a21 * A[1, 0]) * B[0, 0]; double a31 = (a21 * A[1, 1] + a20 * A[0, 1]) * B[1, 0]; double t3 = a30 + a31; a30 /= t3; a31 /= t3; Assert.AreEqual(a00, actual[0, 0]); Assert.AreEqual(a01, actual[0, 1]); Assert.AreEqual(a10, actual[1, 0]); Assert.AreEqual(a11, actual[1, 1]); Assert.AreEqual(a20, actual[2, 0]); Assert.AreEqual(a21, actual[2, 1]); Assert.AreEqual(a30, actual[3, 0]); Assert.AreEqual(a31, actual[3, 1]); double p = System.Math.Exp(logLikelihood); Assert.AreEqual(0.00384315, p, 1e-8); Assert.IsFalse(double.IsNaN(p)); }
/// <summary> /// Computes the gradient using the /// input/outputs stored in this object. /// </summary> /// /// <returns>The value of the gradient vector for the given parameters.</returns> /// protected double[] Gradient() { // Localize thread locals var logLikelihoods = this.logLikelihoods.Value; var inputs = this.inputs.Value; var outputs = this.outputs.Value; var lnZx = this.lnZx.Value; var lnZxy = this.lnZxy.Value; var gradient = this.gradient.Value; double error = 0; // The previous call to Objective could have computed // the log-likelihoods for all input values. However, if // this hasn't been the case, compute them now: if (logLikelihoods == null) model.LogLikelihood(inputs, outputs, out logLikelihoods); // Compute the partition function using the previously // computed likelihoods. Also compute the total error // For each x, compute lnZ(x) and lnZ(x,y) for (int i = 0; i < inputs.Length; i++) { double[] lli = logLikelihoods[i]; // Compute the marginal likelihood double sum = Double.NegativeInfinity; for (int j = 0; j < lli.Length; j++) sum = Special.LogSum(sum, lli[j]); lnZx[i] = sum; lnZxy[i] = lli[outputs[i]]; // compute and return the negative // log-likelihood as error function error -= lnZxy[i] - lnZx[i]; } // Now start computing the gradient w.r.t to the // feature functions. Each feature function belongs // to a factor potential function, so: // For each clique potential (factor potential function) #if DEBUG for (int c = 0; c < function.Factors.Length; c++) #else Parallel.For(0, function.Factors.Length, c => #endif { FactorPotential<T> factor = function.Factors[c]; int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. var lnFwds = new double[inputs.Length][,]; var lnBwds = new double[inputs.Length][,]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.ParameterIndex + factor.ParameterCount; for (int k = factor.ParameterIndex; k < end; k++) { IFeature<T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) marginals[j] = Double.NegativeInfinity; // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); #if DEBUG if (Double.IsNaN(lnsum1) || Double.IsNaN(lnsum2)) throw new Exception(); #endif } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) derivative = 0; #if DEBUG if (Double.IsNaN(derivative)) throw new Exception(); #endif // Include regularization derivative if required if (sigma != 0) derivative -= parameter / sigma; gradient[k] = -derivative; } } #if !DEBUG ); #endif // Reset log-likelihoods so they are recomputed in the next run, // either by the Objective function or by the Gradient calculation. this.logLikelihoods.Value = null; this.error.Value = error; return gradient; // return the gradient. }
private double[] gradient(T[][] observations, int[][] labels) { int N = observations.Length; var function = model.Function; var states = model.States; double[] g = new double[function.Weights.Length]; // Compute sequence probabilities var P = new double[N][, ][]; for (int i = 0; i < N; i++) { var Pi = P[i] = new double[states + 1, states][]; T[] x = observations[i]; var fwd = ForwardBackwardAlgorithm.Forward(function.Factors[0], x, 0); var bwd = ForwardBackwardAlgorithm.Backward(function.Factors[0], x, 0); double z = partition(fwd, x); for (int prev = -1; prev < states; prev++) { for (int next = 0; next < states; next++) { double[] Pis = new double[x.Length]; for (int t = 0; t < x.Length; t++) { Pis[t] = p(prev, next, x, t, fwd, bwd, function) / z; } Pi[prev + 1, next] = Pis; } } } // Compute the gradient w.r.t. each feature // function in the model's potential function. for (int k = 0; k < g.Length; k++) { var feature = function.Features[k]; double sum1 = 0.0, sum2 = 0.0; for (int i = 0; i < N; i++) { T[] x = observations[i]; int[] y = labels[i]; var Pi = P[i]; // Compute first term of the partial derivative sum1 += feature.Compute(-1, y[0], x, 0); for (int t = 1; t < x.Length; t++) { sum1 += feature.Compute(y[t - 1], y[t], x, t); } // Compute second term of the partial derivative for (int prev = -1; prev < states; prev++) { for (int next = 0; next < states; next++) { double[] Pis = Pi[prev + 1, next]; for (int t = 0; t < Pis.Length; t++) { sum2 += feature.Compute(prev, next, x, t) * Pis[t]; } } } } g[k] = -(sum1 - sum2); } return(g); }
/// <summary> /// Computes the forward and backward probabilities matrices /// for a given observation referenced by its index in the /// input training data. /// </summary> /// <param name="index">The index of the observation in the input training data.</param> /// <param name="fwd">Returns the computed forward probabilities matrix.</param> /// <param name="bwd">Returns the computed backward probabilities matrix.</param> /// <param name="scaling">Returns the scaling parameters used during calculations.</param> protected override void ComputeForwardBackward(int index, out double[,] fwd, out double[,] bwd, out double[] scaling) { fwd = ForwardBackwardAlgorithm.Forward(model, continuousObservations[index], out scaling); bwd = ForwardBackwardAlgorithm.Backward(model, continuousObservations[index], scaling); }
private double[] gradient(T[][] observations, int[][] labels, double[] g) { var model = Model; var function = model.Function; int states = model.States; int n = observations.Length; int d = Model.Function.Weights.Length; int Tmax = observations.Max(x => x.Length); int progress = 0; g.Clear(); // Compute sequence probabilities Parallel.For(0, observations.Length, ParallelOptions, () => { // Create thread-local storage var work = new double[states + 1, states][]; for (int j = 0; j < states + 1; j++) { for (int k = 0; k < states; k++) { work[j, k] = new double[Tmax]; } } return(new { bwd = new double[Tmax, states], fwd = new double[Tmax, states], sum1 = new double[d], sum2 = new double[d], work = work, count = new int[] { 0 } }); }, (i, state, local) => { T[] x = observations[i]; var fwd = local.fwd; var bwd = local.bwd; var sum1 = local.sum1; var sum2 = local.sum2; var work = local.work; ForwardBackwardAlgorithm.Forward(function.Factors[0], x, fwd); ForwardBackwardAlgorithm.Backward(function.Factors[0], x, bwd); double z = partition(fwd, x); for (int prev = -1; prev < states; prev++) { for (int next = 0; next < states; next++) { double[] Pis = work[prev + 1, next]; for (int t = 0; t < x.Length; t++) { Pis[t] = p(prev, next, x, t, fwd, bwd, function) / z; } } } // Compute the gradient w.r.t. each feature // function in the model's potential function. int[] y = labels[i]; Parallel.For(0, g.Length, ParallelOptions, k => { IFeature <T> feature = function.Features[k]; // Compute first term of the partial derivative sum1[k] += feature.Compute(-1, y[0], x, 0); for (int t = 1; t < x.Length; t++) { sum1[k] += feature.Compute(y[t - 1], y[t], x, t); } // Compute second term of the partial derivative for (int prev = -1; prev < states; prev++) { for (int next = 0; next < states; next++) { double[] Pis = work[prev + 1, next]; for (int t = 0; t < Pis.Length; t++) { sum2[k] += feature.Compute(prev, next, x, t) * Pis[t]; } } } }); local.count[0]++; return(local); }, (local) => { lock (g) { for (int k = 0; k < g.Length; k++) { g[k] -= (local.sum1[k] - local.sum2[k]); } progress += local.count[0]; } } ); return(g); }
public double Run(int[][] observations_db, double[] weights) { ValidationHelper.ValidateObservationDb(observations_db, 0, mModel.SymbolCount); int K = observations_db.Length; mLogWeights = new double[K]; if (weights != null) { for (int k = 0; k < K; ++k) { mLogWeights[k] = System.Math.Log(weights[k]); } } int N = mModel.StateCount; double lnK = System.Math.Log(K); double[,] logA = mModel.LogTransitionMatrix; double[,] logB = mModel.LogEmissionMatrix; double[] logPi = mModel.LogProbabilityVector; int M = mModel.SymbolCount; mLogGamma = new double[K][, ]; mLogKsi = new double[K][][, ]; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; mLogGamma[k] = new double[T, N]; mLogKsi[k] = new double[T][, ]; for (int t = 0; t < T; ++t) { mLogKsi[k][t] = new double[N, N]; } } int maxT = observations_db.Max(x => x.Length); double[,] lnfwd = new double[maxT, N]; double[,] lnbwd = new double[maxT, N]; // Initialize the model log-likelihoods double newLogLikelihood = Double.NegativeInfinity; double oldLogLikelihood = Double.NegativeInfinity; int iteration = 0; double deltaLogLikelihood = 0; bool should_continue = true; do // Until convergence or max iterations is reached { oldLogLikelihood = newLogLikelihood; for (int k = 0; k < K; ++k) { int[] observations = observations_db[k]; double[,] logGamma = mLogGamma[k]; double[][,] logKsi = mLogKsi[k]; double w = mLogWeights[k]; int T = observations.Length; ForwardBackwardAlgorithm.LogForward(logA, logB, logPi, observations, lnfwd); ForwardBackwardAlgorithm.LogBackward(logA, logB, logPi, observations, lnbwd); // Compute Gamma values for (int t = 0; t < T; ++t) { double lnsum = double.NegativeInfinity; for (int i = 0; i < N; ++i) { logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w; lnsum = LogHelper.LogSum(lnsum, logGamma[t, i]); } if (lnsum != Double.NegativeInfinity) { for (int i = 0; i < N; ++i) { logGamma[t, i] = logGamma[t, i] - lnsum; } } } // Compute Ksi values for (int t = 0; t < T - 1; ++t) { double lnsum = double.NegativeInfinity; int x = observations[t + 1]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + logB[j, x] + w; lnsum = LogHelper.LogSum(lnsum, logKsi[t][i, j]); } } for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = logKsi[t][i, j] - lnsum; } } } newLogLikelihood = Double.NegativeInfinity; for (int i = 0; i < N; ++i) { newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]); } } newLogLikelihood /= K; deltaLogLikelihood = newLogLikelihood - oldLogLikelihood; iteration++; //Console.WriteLine("Iteration: {0}", iteration); if (ShouldTerminate(deltaLogLikelihood, iteration)) { should_continue = false; } else { // update pi for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]); } logPi[i] = lnsum - lnK; } // update A for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; for (int t = 0; t < T - 1; ++t) { lnnum = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]); lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); } } logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom; } } // update B for (int i = 0; i < N; ++i) { for (int m = 0; m < M; ++m) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int[] observations = observations_db[k]; int T = observations.Length; for (int t = 0; t < T; ++t) { lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); if (observations[t] == m) { lnnum = LogHelper.LogSum(lnnum, mLogGamma[k][t, i]); } } } logB[i, m] = lnnum - lndenom; } } } } while (should_continue); return(newLogLikelihood); }
/// <summary> /// for univariate /// </summary> /// <param name="observations_db"></param> /// <param name="weights"></param> /// <returns></returns> public double Run(double[][] observations_db, double[] weights) { DiagnosticsHelper.Assert(mModel.Dimension == 1); int K = observations_db.Length; mLogWeights = new double[K]; if (weights != null) { for (int k = 0; k < K; ++k) { mLogWeights[k] = System.Math.Log(weights[k]); } } double[] observations_db_1d = MathHelper.Concatenate <double>(observations_db); double[] Bweights = new double[observations_db_1d.Length]; int N = mModel.StateCount; double lnK = System.Math.Log(K); double[,] logA = mModel.LogTransitionMatrix; DistributionModel[] probB = mModel.EmissionModels; double[] logPi = mModel.LogProbabilityVector; int M = mModel.SymbolCount; mLogGamma = new double[K][, ]; mLogKsi = new double[K][][, ]; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; mLogGamma[k] = new double[T, N]; mLogKsi[k] = new double[T][, ]; for (int t = 0; t < T; ++t) { mLogKsi[k][t] = new double[N, N]; } } int maxT = observations_db.Max(x => x.Length); double[,] lnfwd = new double[maxT, N]; double[,] lnbwd = new double[maxT, N]; // Initialize the model log-likelihoods double newLogLikelihood = Double.NegativeInfinity; double oldLogLikelihood = Double.NegativeInfinity; int iteration = 0; double deltaLogLikelihood = 0; bool should_continue = true; do // Until convergence or max iterations is reached { oldLogLikelihood = newLogLikelihood; for (int k = 0; k < K; ++k) { double[] observations = observations_db[k]; double[,] logGamma = mLogGamma[k]; double[][,] logKsi = mLogKsi[k]; double w = mLogWeights[k]; int T = observations.Length; ForwardBackwardAlgorithm.LogForward(logA, probB, logPi, observations, lnfwd); ForwardBackwardAlgorithm.LogBackward(logA, probB, logPi, observations, lnbwd); // Compute Gamma values for (int t = 0; t < T; ++t) { double lnsum = double.NegativeInfinity; for (int i = 0; i < N; ++i) { logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w; lnsum = LogHelper.LogSum(lnsum, logGamma[t, i]); } if (lnsum != Double.NegativeInfinity) { for (int i = 0; i < N; ++i) { logGamma[t, i] = logGamma[t, i] - lnsum; } } } // Compute Ksi values for (int t = 0; t < T - 1; ++t) { double lnsum = double.NegativeInfinity; double x = observations[t + 1]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + MathHelper.LogProbabilityFunction(probB[j], x) + w; lnsum = LogHelper.LogSum(lnsum, logKsi[t][i, j]); } } if (lnsum != double.NegativeInfinity) { for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = logKsi[t][i, j] - lnsum; } } } } newLogLikelihood = Double.NegativeInfinity; for (int i = 0; i < N; ++i) { newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]); } } newLogLikelihood /= K; deltaLogLikelihood = newLogLikelihood - oldLogLikelihood; iteration++; if (ShouldTerminate(deltaLogLikelihood, iteration)) { should_continue = false; } else { // update pi for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]); } logPi[i] = lnsum - lnK; } // update A for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; for (int t = 0; t < T - 1; ++t) { lnnum = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]); lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); } } logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom; } } // update B for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0, w = 0; k < K; ++k) { double[] observations = observations_db[k]; int T = observations.Length; for (int t = 0; t < T; ++t, ++w) { Bweights[w] = mLogGamma[k][t, i]; lnsum = LogHelper.LogSum(lnsum, Bweights[w]); } } if (lnsum != double.NegativeInfinity) { for (int w = 0; w < Bweights.Length; ++w) { Bweights[w] = Bweights[w] - lnsum; } } for (int w = 0; w < Bweights.Length; ++w) { Bweights[w] = System.Math.Exp(Bweights[w]); } probB[i].Process(observations_db_1d, Bweights); } } } while (should_continue); return(newLogLikelihood); }
private void InnerGradient(FactorPotential <T> factor, T[][] inputs, int[] outputs, double[] lnZx, double[] lnZxy, double[] gradient) { int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. double[][,] lnFwds = new double[inputs.Length][, ]; double[][,] lnBwds = new double[inputs.Length][, ]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.FactorParameters.Offset + factor.FactorParameters.Count; for (int k = factor.FactorParameters.Offset; k < end; k++) { IFeature <T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) { marginals[j] = Double.NegativeInfinity; } // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, (marginals[y] == lnZxy[i]) ? 0 : marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) { lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); } Accord.Diagnostics.Debug.Assert(!marginals.HasNaN()); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum1)); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum2)); } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) { derivative = 0; } Accord.Diagnostics.Debug.Assert(!Double.IsNaN(derivative)); // Include regularization derivative if required if (sigma != 0) { derivative -= parameter / sigma; } gradient[k] = -derivative; } }