public void LogBackwardTest() { HiddenMarkovModel hmm = Accord.Tests.Statistics.Models.Markov. ForwardBackwardAlgorithmTest.CreateModel2(); int[] observations = { 0, 1, 1, 0 }; double logLikelihood; double[,] expected = Matrix.Log( ForwardBackwardAlgorithm.Backward(hmm, observations)); double[,] actual = ForwardBackwardAlgorithm.LogBackward(hmm, observations, out logLikelihood); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); foreach (double e in actual) { Assert.IsFalse(double.IsNaN(e)); } double p = System.Math.Exp(logLikelihood); Assert.AreEqual(0.054814695, p, 1e-8); Assert.IsFalse(double.IsNaN(p)); }
/// <summary> /// Computes the forward and backward probabilities matrices /// for a given observation referenced by its index in the /// input training data. /// </summary> /// /// <param name="index">The index of the observation in the input training data.</param> /// <param name="lnFwd">Returns the computed forward probabilities matrix.</param> /// <param name="lnBwd">Returns the computed backward probabilities matrix.</param> /// protected override void ComputeForwardBackward(int index, double[,] lnFwd, double[,] lnBwd) { int states = model.States; int T = discreteObservations[index].Length; System.Diagnostics.Debug.Assert(lnBwd.GetLength(0) >= T); System.Diagnostics.Debug.Assert(lnBwd.GetLength(1) == states); System.Diagnostics.Debug.Assert(lnFwd.GetLength(0) >= T); System.Diagnostics.Debug.Assert(lnFwd.GetLength(1) == states); ForwardBackwardAlgorithm.LogForward(model, discreteObservations[index], lnFwd); ForwardBackwardAlgorithm.LogBackward(model, discreteObservations[index], lnBwd); }
/// <summary> /// Computes the forward and backward probabilities matrices /// for a given observation referenced by its index in the /// input training data. /// </summary> /// <param name="index">The index of the observation in the input training data.</param> /// <param name="lnFwd">Returns the computed forward probabilities matrix.</param> /// <param name="lnBwd">Returns the computed backward probabilities matrix.</param> /// protected void ComputeForwardBackward(int index, double[,] lnFwd, double[,] lnBwd) { int states = Model.States; int T = vectorObservations[index].Length; Accord.Diagnostics.Debug.Assert(lnBwd.GetLength(0) >= T); Accord.Diagnostics.Debug.Assert(lnBwd.GetLength(1) == states); Accord.Diagnostics.Debug.Assert(lnFwd.GetLength(0) >= T); Accord.Diagnostics.Debug.Assert(lnFwd.GetLength(1) == states); ForwardBackwardAlgorithm.LogForward(Model, vectorObservations[index], lnFwd); ForwardBackwardAlgorithm.LogBackward(Model, vectorObservations[index], lnBwd); }
public void LogForwardBackwardGenericTest() { var discreteModel = CreateModel1(); var genericModel = CreateModel4(); int[] discreteObservations = { 2, 2, 1, 0 }; double[][] genericObservations = { new double[] { 2 }, new double[] { 2 }, new double[] { 1 }, new double[] { 0 } }; double discreteFwdLogLikelihood; double[,] discreteFwd = ForwardBackwardAlgorithm.LogForward(discreteModel, discreteObservations, out discreteFwdLogLikelihood); double discreteBwdLogLikelihood; double[,] discreteBwd = ForwardBackwardAlgorithm.LogBackward(discreteModel, discreteObservations, out discreteBwdLogLikelihood); double genericFwdLogLikelihood; double[,] genericFwd = ForwardBackwardAlgorithm.LogForward(genericModel, genericObservations, out genericFwdLogLikelihood); double genericBwdLogLikelihood; double[,] genericBwd = ForwardBackwardAlgorithm.LogBackward(genericModel, genericObservations, out genericBwdLogLikelihood); Assert.AreEqual(discreteFwdLogLikelihood, discreteBwdLogLikelihood); Assert.AreEqual(genericFwdLogLikelihood, genericBwdLogLikelihood); Assert.AreEqual(discreteFwdLogLikelihood, genericBwdLogLikelihood); for (int i = 0; i < discreteFwd.GetLength(0); i++) { for (int j = 0; j < discreteFwd.GetLength(1); j++) { Assert.AreEqual(discreteFwd[i, j], genericFwd[i, j]); Assert.AreEqual(discreteBwd[i, j], genericBwd[i, j]); } } }
public void LogBackwardTest2() { HiddenMarkovModel hmm = Accord.Tests.Statistics.Models.Markov. ForwardBackwardAlgorithmTest.CreateModel3(); int[] observations = { 0, 0, 1, 1 }; double[,] expected = Matrix.Log( ForwardBackwardAlgorithm.Backward(hmm, observations)); double[,] actual = ForwardBackwardAlgorithm.LogBackward(hmm, observations); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); foreach (double p in actual) { Assert.IsFalse(double.IsNaN(p)); } }
/// <summary> /// Computes the gradient using the /// input/outputs stored in this object. /// </summary> /// /// <returns>The value of the gradient vector for the given parameters.</returns> /// protected double[] Gradient() { // Localize thread locals var logLikelihoods = this.logLikelihoods.Value; var inputs = this.inputs.Value; var outputs = this.outputs.Value; var lnZx = this.lnZx.Value; var lnZxy = this.lnZxy.Value; var gradient = this.gradient.Value; double error = 0; // The previous call to Objective could have computed // the log-likelihoods for all input values. However, if // this hasn't been the case, compute them now: if (logLikelihoods == null) model.LogLikelihood(inputs, outputs, out logLikelihoods); // Compute the partition function using the previously // computed likelihoods. Also compute the total error // For each x, compute lnZ(x) and lnZ(x,y) for (int i = 0; i < inputs.Length; i++) { double[] lli = logLikelihoods[i]; // Compute the marginal likelihood double sum = Double.NegativeInfinity; for (int j = 0; j < lli.Length; j++) sum = Special.LogSum(sum, lli[j]); lnZx[i] = sum; lnZxy[i] = lli[outputs[i]]; // compute and return the negative // log-likelihood as error function error -= lnZxy[i] - lnZx[i]; } // Now start computing the gradient w.r.t to the // feature functions. Each feature function belongs // to a factor potential function, so: // For each clique potential (factor potential function) #if DEBUG for (int c = 0; c < function.Factors.Length; c++) #else Parallel.For(0, function.Factors.Length, c => #endif { FactorPotential<T> factor = function.Factors[c]; int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. var lnFwds = new double[inputs.Length][,]; var lnBwds = new double[inputs.Length][,]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.ParameterIndex + factor.ParameterCount; for (int k = factor.ParameterIndex; k < end; k++) { IFeature<T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) marginals[j] = Double.NegativeInfinity; // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); #if DEBUG if (Double.IsNaN(lnsum1) || Double.IsNaN(lnsum2)) throw new Exception(); #endif } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) derivative = 0; #if DEBUG if (Double.IsNaN(derivative)) throw new Exception(); #endif // Include regularization derivative if required if (sigma != 0) derivative -= parameter / sigma; gradient[k] = -derivative; } } #if !DEBUG ); #endif // Reset log-likelihoods so they are recomputed in the next run, // either by the Objective function or by the Gradient calculation. this.logLikelihoods.Value = null; this.error.Value = error; return gradient; // return the gradient. }
public double Run(int[][] observations_db, double[] weights) { ValidationHelper.ValidateObservationDb(observations_db, 0, mModel.SymbolCount); int K = observations_db.Length; mLogWeights = new double[K]; if (weights != null) { for (int k = 0; k < K; ++k) { mLogWeights[k] = System.Math.Log(weights[k]); } } int N = mModel.StateCount; double lnK = System.Math.Log(K); double[,] logA = mModel.LogTransitionMatrix; double[,] logB = mModel.LogEmissionMatrix; double[] logPi = mModel.LogProbabilityVector; int M = mModel.SymbolCount; mLogGamma = new double[K][, ]; mLogKsi = new double[K][][, ]; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; mLogGamma[k] = new double[T, N]; mLogKsi[k] = new double[T][, ]; for (int t = 0; t < T; ++t) { mLogKsi[k][t] = new double[N, N]; } } int maxT = observations_db.Max(x => x.Length); double[,] lnfwd = new double[maxT, N]; double[,] lnbwd = new double[maxT, N]; // Initialize the model log-likelihoods double newLogLikelihood = Double.NegativeInfinity; double oldLogLikelihood = Double.NegativeInfinity; int iteration = 0; double deltaLogLikelihood = 0; bool should_continue = true; do // Until convergence or max iterations is reached { oldLogLikelihood = newLogLikelihood; for (int k = 0; k < K; ++k) { int[] observations = observations_db[k]; double[,] logGamma = mLogGamma[k]; double[][,] logKsi = mLogKsi[k]; double w = mLogWeights[k]; int T = observations.Length; ForwardBackwardAlgorithm.LogForward(logA, logB, logPi, observations, lnfwd); ForwardBackwardAlgorithm.LogBackward(logA, logB, logPi, observations, lnbwd); // Compute Gamma values for (int t = 0; t < T; ++t) { double lnsum = double.NegativeInfinity; for (int i = 0; i < N; ++i) { logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w; lnsum = LogHelper.LogSum(lnsum, logGamma[t, i]); } if (lnsum != Double.NegativeInfinity) { for (int i = 0; i < N; ++i) { logGamma[t, i] = logGamma[t, i] - lnsum; } } } // Compute Ksi values for (int t = 0; t < T - 1; ++t) { double lnsum = double.NegativeInfinity; int x = observations[t + 1]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + logB[j, x] + w; lnsum = LogHelper.LogSum(lnsum, logKsi[t][i, j]); } } for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = logKsi[t][i, j] - lnsum; } } } newLogLikelihood = Double.NegativeInfinity; for (int i = 0; i < N; ++i) { newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]); } } newLogLikelihood /= K; deltaLogLikelihood = newLogLikelihood - oldLogLikelihood; iteration++; //Console.WriteLine("Iteration: {0}", iteration); if (ShouldTerminate(deltaLogLikelihood, iteration)) { should_continue = false; } else { // update pi for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]); } logPi[i] = lnsum - lnK; } // update A for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; for (int t = 0; t < T - 1; ++t) { lnnum = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]); lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); } } logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom; } } // update B for (int i = 0; i < N; ++i) { for (int m = 0; m < M; ++m) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int[] observations = observations_db[k]; int T = observations.Length; for (int t = 0; t < T; ++t) { lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); if (observations[t] == m) { lnnum = LogHelper.LogSum(lnnum, mLogGamma[k][t, i]); } } } logB[i, m] = lnnum - lndenom; } } } } while (should_continue); return(newLogLikelihood); }
/// <summary> /// for univariate /// </summary> /// <param name="observations_db"></param> /// <param name="weights"></param> /// <returns></returns> public double Run(double[][] observations_db, double[] weights) { DiagnosticsHelper.Assert(mModel.Dimension == 1); int K = observations_db.Length; mLogWeights = new double[K]; if (weights != null) { for (int k = 0; k < K; ++k) { mLogWeights[k] = System.Math.Log(weights[k]); } } double[] observations_db_1d = MathHelper.Concatenate <double>(observations_db); double[] Bweights = new double[observations_db_1d.Length]; int N = mModel.StateCount; double lnK = System.Math.Log(K); double[,] logA = mModel.LogTransitionMatrix; DistributionModel[] probB = mModel.EmissionModels; double[] logPi = mModel.LogProbabilityVector; int M = mModel.SymbolCount; mLogGamma = new double[K][, ]; mLogKsi = new double[K][][, ]; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; mLogGamma[k] = new double[T, N]; mLogKsi[k] = new double[T][, ]; for (int t = 0; t < T; ++t) { mLogKsi[k][t] = new double[N, N]; } } int maxT = observations_db.Max(x => x.Length); double[,] lnfwd = new double[maxT, N]; double[,] lnbwd = new double[maxT, N]; // Initialize the model log-likelihoods double newLogLikelihood = Double.NegativeInfinity; double oldLogLikelihood = Double.NegativeInfinity; int iteration = 0; double deltaLogLikelihood = 0; bool should_continue = true; do // Until convergence or max iterations is reached { oldLogLikelihood = newLogLikelihood; for (int k = 0; k < K; ++k) { double[] observations = observations_db[k]; double[,] logGamma = mLogGamma[k]; double[][,] logKsi = mLogKsi[k]; double w = mLogWeights[k]; int T = observations.Length; ForwardBackwardAlgorithm.LogForward(logA, probB, logPi, observations, lnfwd); ForwardBackwardAlgorithm.LogBackward(logA, probB, logPi, observations, lnbwd); // Compute Gamma values for (int t = 0; t < T; ++t) { double lnsum = double.NegativeInfinity; for (int i = 0; i < N; ++i) { logGamma[t, i] = lnfwd[t, i] + lnbwd[t, i] + w; lnsum = LogHelper.LogSum(lnsum, logGamma[t, i]); } if (lnsum != Double.NegativeInfinity) { for (int i = 0; i < N; ++i) { logGamma[t, i] = logGamma[t, i] - lnsum; } } } // Compute Ksi values for (int t = 0; t < T - 1; ++t) { double lnsum = double.NegativeInfinity; double x = observations[t + 1]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = lnfwd[t, i] + logA[i, j] + lnbwd[t + 1, j] + MathHelper.LogProbabilityFunction(probB[j], x) + w; lnsum = LogHelper.LogSum(lnsum, logKsi[t][i, j]); } } if (lnsum != double.NegativeInfinity) { for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { logKsi[t][i, j] = logKsi[t][i, j] - lnsum; } } } } newLogLikelihood = Double.NegativeInfinity; for (int i = 0; i < N; ++i) { newLogLikelihood = LogHelper.LogSum(newLogLikelihood, lnfwd[T - 1, i]); } } newLogLikelihood /= K; deltaLogLikelihood = newLogLikelihood - oldLogLikelihood; iteration++; if (ShouldTerminate(deltaLogLikelihood, iteration)) { should_continue = false; } else { // update pi for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { lnsum = LogHelper.LogSum(lnsum, mLogGamma[k][0, i]); } logPi[i] = lnsum - lnK; } // update A for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { double lndenom = double.NegativeInfinity; double lnnum = double.NegativeInfinity; for (int k = 0; k < K; ++k) { int T = observations_db[k].Length; for (int t = 0; t < T - 1; ++t) { lnnum = LogHelper.LogSum(lnnum, mLogKsi[k][t][i, j]); lndenom = LogHelper.LogSum(lndenom, mLogGamma[k][t, i]); } } logA[i, j] = (lnnum == lndenom) ? 0 : lnnum - lndenom; } } // update B for (int i = 0; i < N; ++i) { double lnsum = double.NegativeInfinity; for (int k = 0, w = 0; k < K; ++k) { double[] observations = observations_db[k]; int T = observations.Length; for (int t = 0; t < T; ++t, ++w) { Bweights[w] = mLogGamma[k][t, i]; lnsum = LogHelper.LogSum(lnsum, Bweights[w]); } } if (lnsum != double.NegativeInfinity) { for (int w = 0; w < Bweights.Length; ++w) { Bweights[w] = Bweights[w] - lnsum; } } for (int w = 0; w < Bweights.Length; ++w) { Bweights[w] = System.Math.Exp(Bweights[w]); } probB[i].Process(observations_db_1d, Bweights); } } } while (should_continue); return(newLogLikelihood); }
private void InnerGradient(FactorPotential <T> factor, T[][] inputs, int[] outputs, double[] lnZx, double[] lnZxy, double[] gradient) { int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. double[][,] lnFwds = new double[inputs.Length][, ]; double[][,] lnBwds = new double[inputs.Length][, ]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.FactorParameters.Offset + factor.FactorParameters.Count; for (int k = factor.FactorParameters.Offset; k < end; k++) { IFeature <T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) { marginals[j] = Double.NegativeInfinity; } // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, (marginals[y] == lnZxy[i]) ? 0 : marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) { lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); } Accord.Diagnostics.Debug.Assert(!marginals.HasNaN()); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum1)); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum2)); } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) { derivative = 0; } Accord.Diagnostics.Debug.Assert(!Double.IsNaN(derivative)); // Include regularization derivative if required if (sigma != 0) { derivative -= parameter / sigma; } gradient[k] = -derivative; } }