/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Backward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output = 0) { int states = function.States; int T = observations.Length; double[,] bwd = new double[T, states]; // 1. Initialization for (int i = 0; i < states; i++) { bwd[T - 1, i] = 1.0; } // 2. Induction for (int t = T - 2; t >= 0; t--) { for (int i = 0; i < states; i++) { double sum = 0; for (int j = 0; j < states; j++) { sum += bwd[t + 1, j] * Math.Exp(function.Compute(i, j, observations, t + 1, output)); } bwd[t, i] += sum; } } return(bwd); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> /// public static void LogBackward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[,] lnBwd) { int states = function.States; int T = observations.Length; // Ensures minimum requirements Accord.Diagnostics.Debug.Assert(lnBwd.GetLength(0) >= T); Accord.Diagnostics.Debug.Assert(lnBwd.GetLength(1) == states); Array.Clear(lnBwd, 0, lnBwd.Length); // 1. Initialization for (int i = 0; i < states; i++) { lnBwd[T - 1, i] = 0; } // 2. Induction for (int t = T - 2; t >= 0; t--) { for (int i = 0; i < states; i++) { double sum = Double.NegativeInfinity; for (int j = 0; j < states; j++) { sum = Special.LogSum(sum, lnBwd[t + 1, j] + function.Compute(i, j, observations, t + 1, output)); } lnBwd[t, i] += sum; } } }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> /// public static void Backward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[] scaling, double[,] bwd) { int states = function.States; int T = observations.Length; // Ensures minimum requirements Accord.Diagnostics.Debug.Assert(bwd.GetLength(0) >= T); Accord.Diagnostics.Debug.Assert(bwd.GetLength(1) == states); Array.Clear(bwd, 0, bwd.Length); // For backward variables, we use the same scale factors // for each time t as were used for forward variables. // 1. Initialization for (int i = 0; i < states; i++) { bwd[T - 1, i] = 1.0 / scaling[T - 1]; } // 2. Induction for (int t = T - 2; t >= 0; t--) { for (int i = 0; i < states; i++) { double sum = 0; for (int j = 0; j < states; j++) { sum += bwd[t + 1, j] * Math.Exp(function.Compute(i, j, observations, t + 1, output)); } bwd[t, i] += sum / scaling[t]; } } }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output = 0) { int states = function.States; int T = observations.Length; double[,] fwd = new double[T, states]; // 1. Initialization for (int i = 0; i < states; i++) { fwd[0, i] = Math.Exp(function.Compute(-1, i, observations, 0, output)); } // 2. Induction for (int t = 1; t < T; t++) { for (int i = 0; i < states; i++) { double sum = 0.0; for (int j = 0; j < states; j++) { sum += fwd[t - 1, j] * Math.Exp(function.Compute(j, i, observations, t, output)); } fwd[t, i] = sum; } } return(fwd); }
/// <summary> /// Computes Forward probabilities for a given hidden Markov model and a set of observations. /// </summary> /// public static void LogForward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[,] lnFwd) { int states = function.States; int T = observations.Length; // Ensures minimum requirements System.Diagnostics.Debug.Assert(lnFwd.GetLength(0) >= T); System.Diagnostics.Debug.Assert(lnFwd.GetLength(1) == states); Array.Clear(lnFwd, 0, lnFwd.Length); // 1. Initialization for (int i = 0; i < states; i++) { lnFwd[0, i] = function.Compute(-1, i, observations, 0, output); } // 2. Induction for (int t = 1; t < T; t++) { for (int i = 0; i < states; i++) { double sum = Double.NegativeInfinity; for (int j = 0; j < states; j++) { sum = Special.LogSum(sum, lnFwd[t - 1, j] + function.Compute(j, i, observations, t, output)); } lnFwd[t, i] = sum; } } }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> /// public static void LogBackward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[,] lnBwd) { int states = function.States; int T = observations.Length; // Ensures minimum requirements System.Diagnostics.Trace.Assert(lnBwd.GetLength(0) >= T); System.Diagnostics.Trace.Assert(lnBwd.GetLength(1) == states); Array.Clear(lnBwd, 0, lnBwd.Length); // For backward variables, we use the same scale factors // for each time t as were used for forward variables. // 1. Initialization for (int i = 0; i < states; i++) { lnBwd[T - 1, i] = 0; } // 2. Induction for (int t = T - 2; t >= 0; t--) { for (int i = 0; i < states; i++) { double sum = double.NegativeInfinity; for (int j = 0; j < states; j++) { sum = Special.LogSum(sum, lnBwd[t + 1, j] + function.Compute(i, j, observations, t + 1, output)); } lnBwd[t, i] += sum; } } }
/// <summary> /// Computes Forward probabilities for a given hidden Markov model and a set of observations. /// </summary> /// public static void Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[] scaling, double[,] fwd) { int states = function.States; int T = observations.Length; double s = 0; // Ensures minimum requirements Accord.Diagnostics.Debug.Assert(fwd.GetLength(0) >= T); Accord.Diagnostics.Debug.Assert(fwd.GetLength(1) == states); Accord.Diagnostics.Debug.Assert(scaling.Length >= T); Array.Clear(fwd, 0, fwd.Length); // 1. Initialization for (int i = 0; i < states; i++) { s += fwd[0, i] = Math.Exp(function.Compute(-1, i, observations, 0, output)); } scaling[0] = s; if (s != 0) // Scaling { for (int i = 0; i < states; i++) { fwd[0, i] /= s; } } // 2. Induction for (int t = 1; t < T; t++) { s = 0; for (int i = 0; i < states; i++) { double sum = 0.0; for (int j = 0; j < states; j++) { sum += fwd[t - 1, j] * Math.Exp(function.Compute(j, i, observations, t, output)); } fwd[t, i] = sum; s += fwd[t, i]; // scaling coefficient } scaling[t] = s; if (s != 0) // Scaling { for (int i = 0; i < states; i++) { fwd[t, i] /= s; } } } }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output = 0) { int states = function.States; double[,] fwd = new double[observations.Length, states]; return(Forward(function, observations, fwd, output)); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> public static double[,] LogBackward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output) { int states = function.States; double[,] lnBwd = new double[observations.Length, states]; LogBackward(function, observations, output, lnBwd); return(lnBwd); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> public static double[,] Backward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, double[] scaling) { int states = function.States; double[,] bwd = new double[observations.Length, states]; Backward(function, observations, output, scaling, bwd); return(bwd); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Backward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output = 0) { int states = function.States; int T = observations.Length; double[,] bwd = new double[T, states]; return(Backward(function, observations, bwd, output) ); }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double[] scaling) { int states = function.States; double[,] fwd = new double[observations.Length, states]; scaling = new double[observations.Length]; Forward(function, observations, output, scaling, fwd); return(fwd); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations(no scaling). /// </summary> public static double[,] LogBackward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double logLikelihood) { int states = function.States; var lnBwd = LogBackward(function, observations, output); logLikelihood = double.NegativeInfinity; for (int i = 0; i < states; i++) { logLikelihood = Special.LogSum(logLikelihood, lnBwd[0, i] + function.Compute(-1, i, observations, 0, output)); } return(lnBwd); }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double[] scaling, out double logLikelihood) { int states = function.States; double[,] fwd = new double[observations.Length, states]; scaling = new double[observations.Length]; Forward(function, observations, output, scaling, fwd); logLikelihood = 0; for (int i = 0; i < scaling.Length; i++) { logLikelihood += Math.Log(scaling[i]); } return(fwd); }
/// <summary> /// Computes Backward probabilities for a given potential function and a set of observations(no scaling). /// </summary> public static double[,] Backward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double logLikelihood) { int states = function.States; var bwd = Backward(function, observations, output); double likelihood = 0; for (int i = 0; i < states; i++) { likelihood += bwd[0, i] * Math.Exp(function.Compute(-1, i, observations, 0, output)); } logLikelihood = Math.Log(likelihood); return(bwd); }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] LogForward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double logLikelihood) { int states = function.States; double[,] lnFwd = new double[observations.Length, states]; int T = observations.Length; ForwardBackwardAlgorithm.LogForward(function, observations, output, lnFwd); logLikelihood = Double.NegativeInfinity; for (int j = 0; j < states; j++) { logLikelihood = Special.LogSum(logLikelihood, lnFwd[T - 1, j]); } return(lnFwd); }
/// <summary> /// Computes Forward probabilities for a given potential function and a set of observations. /// </summary> /// public static double[,] Forward <TObservation>(FactorPotential <TObservation> function, TObservation[] observations, int output, out double logLikelihood) { int states = function.States; double[,] fwd = new double[observations.Length, states]; double[] coefficients = new double[observations.Length]; ForwardBackwardAlgorithm.Forward(function, observations, output, coefficients, fwd); logLikelihood = 0; for (int i = 0; i < coefficients.Length; i++) { logLikelihood += Math.Log(coefficients[i]); } return(fwd); }
/// <summary> /// Computes the probability of occurance of this /// feature given a sequence of observations. /// </summary> /// /// <param name="fwd">The matrix of forward state probabilities.</param> /// <param name="bwd">The matrix of backward state probabilties.</param> /// <param name="x">The observation sequence.</param> /// <param name="y">The output class label for the sequence.</param> /// /// <returns>The probability of occurance of this feature.</returns> /// public override double Marginal(double[,] fwd, double[,] bwd, T[] x, int y) { // Assume the simplifying structure that each // factor is responsible for single output y. if (y != OwnerFactorIndex) { return(0); } FactorPotential <T> owner = Owner.Factors[OwnerFactorIndex]; double marginal = 0; for (int t = 0; t < x.Length - 1; t++) { marginal += fwd[t, previous] * bwd[t + 1, current] * Math.Exp(owner.Compute(previous, current, x, t + 1, y)); } return(marginal); }
/// <summary> /// Computes the log-probability of occurance of this /// feature given a sequence of observations. /// </summary> /// /// <param name="lnFwd">The matrix of forward state log-probabilities.</param> /// <param name="lnBwd">The matrix of backward state log-probabilties.</param> /// <param name="x">The observation sequence.</param> /// <param name="y">The output class label for the sequence.</param> /// /// <returns>The probability of occurance of this feature.</returns> /// public override double LogMarginal(double[,] lnFwd, double[,] lnBwd, T[] x, int y) { // Assume the simplifying structure that each // factor is responsible for single output y. if (y != OwnerFactorIndex) { return(Double.NegativeInfinity); } FactorPotential <T> owner = Owner.Factors[OwnerFactorIndex]; double marginal = double.NegativeInfinity; for (int t = 0; t < x.Length - 1; t++) { marginal = Special.LogSum(marginal, lnFwd[t, previous] + lnBwd[t + 1, current] + owner.Compute(previous, current, x, t + 1, y)); } return(marginal); }
private void InnerGradient(FactorPotential <T> factor, T[][] inputs, int[] outputs, double[] lnZx, double[] lnZxy, double[] gradient) { int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. double[][,] lnFwds = new double[inputs.Length][, ]; double[][,] lnBwds = new double[inputs.Length][, ]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.FactorParameters.Offset + factor.FactorParameters.Count; for (int k = factor.FactorParameters.Offset; k < end; k++) { IFeature <T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) { marginals[j] = Double.NegativeInfinity; } // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, (marginals[y] == lnZxy[i]) ? 0 : marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) { lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); } Accord.Diagnostics.Debug.Assert(!marginals.HasNaN()); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum1)); Accord.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum2)); } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) { derivative = 0; } Accord.Diagnostics.Debug.Assert(!Double.IsNaN(derivative)); // Include regularization derivative if required if (sigma != 0) { derivative -= parameter / sigma; } gradient[k] = -derivative; } }
/// <summary> /// Computes the gradient using the /// input/outputs stored in this object. /// </summary> /// /// <returns>The value of the gradient vector for the given parameters.</returns> /// protected double[] Gradient() { // Localize thread locals var logLikelihoods = this.logLikelihoods.Value; var inputs = this.inputs.Value; var outputs = this.outputs.Value; var lnZx = this.lnZx.Value; var lnZxy = this.lnZxy.Value; var gradient = this.gradient.Value; double error = 0; // The previous call to Objective could have computed // the log-likelihoods for all input values. However, if // this hasn't been the case, compute them now: if (logLikelihoods == null) model.LogLikelihood(inputs, outputs, out logLikelihoods); // Compute the partition function using the previously // computed likelihoods. Also compute the total error // For each x, compute lnZ(x) and lnZ(x,y) for (int i = 0; i < inputs.Length; i++) { double[] lli = logLikelihoods[i]; // Compute the marginal likelihood double sum = Double.NegativeInfinity; for (int j = 0; j < lli.Length; j++) sum = Special.LogSum(sum, lli[j]); lnZx[i] = sum; lnZxy[i] = lli[outputs[i]]; // compute and return the negative // log-likelihood as error function error -= lnZxy[i] - lnZx[i]; } // Now start computing the gradient w.r.t to the // feature functions. Each feature function belongs // to a factor potential function, so: // For each clique potential (factor potential function) #if DEBUG for (int c = 0; c < function.Factors.Length; c++) #else Parallel.For(0, function.Factors.Length, c => #endif { FactorPotential<T> factor = function.Factors[c]; int factorIndex = factor.Index; // Compute all forward and backward matrices to be // used in the feature functions marginal computations. var lnFwds = new double[inputs.Length][,]; var lnBwds = new double[inputs.Length][,]; for (int i = 0; i < inputs.Length; i++) { lnFwds[i] = ForwardBackwardAlgorithm.LogForward(factor, inputs[i], factorIndex); lnBwds[i] = ForwardBackwardAlgorithm.LogBackward(factor, inputs[i], factorIndex); } double[] marginals = new double[function.Outputs]; // For each feature in the factor potential function int end = factor.ParameterIndex + factor.ParameterCount; for (int k = factor.ParameterIndex; k < end; k++) { IFeature<T> feature = function.Features[k]; double parameter = function.Weights[k]; if (Double.IsInfinity(parameter)) { gradient[k] = 0; continue; } // Compute the two marginal sums for the gradient calculation // as given in eq. 1.52 of Sutton, McCallum; "An introduction to // Conditional Random Fields for Relational Learning". The sums // will be computed in the log domain for numerical stability. double lnsum1 = Double.NegativeInfinity; double lnsum2 = Double.NegativeInfinity; // For each training sample (sequences) for (int i = 0; i < inputs.Length; i++) { T[] x = inputs[i]; // training input int y = outputs[i]; // training output // Compute marginals for all possible outputs for (int j = 0; j < marginals.Length; j++) marginals[j] = Double.NegativeInfinity; // However, making the assumption that each factor is responsible for only // one output label, we can compute the marginal only for the current factor marginals[factorIndex] = feature.LogMarginal(lnFwds[i], lnBwds[i], x, factorIndex); // The first term contains a marginal probability p(w|x,y), which is // exactly a marginal distribution of the clamped CRF (eq. 1.46). lnsum1 = Special.LogSum(lnsum1, marginals[y] - lnZxy[i]); // The second term contains a different marginal p(w,y|x) which is the // same marginal probability required in as fully-observed CRF. for (int j = 0; j < marginals.Length; j++) lnsum2 = Special.LogSum(lnsum2, marginals[j] - lnZx[i]); #if DEBUG if (Double.IsNaN(lnsum1) || Double.IsNaN(lnsum2)) throw new Exception(); #endif } // Compute the current derivative double sum1 = Math.Exp(lnsum1); double sum2 = Math.Exp(lnsum2); double derivative = sum1 - sum2; if (sum1 == sum2) derivative = 0; #if DEBUG if (Double.IsNaN(derivative)) throw new Exception(); #endif // Include regularization derivative if required if (sigma != 0) derivative -= parameter / sigma; gradient[k] = -derivative; } } #if !DEBUG ); #endif // Reset log-likelihoods so they are recomputed in the next run, // either by the Objective function or by the Gradient calculation. this.logLikelihoods.Value = null; this.error.Value = error; return gradient; // return the gradient. }
private int[] viterbi(FactorPotential <T> factor, T[] observations, out double logLikelihood) { // Viterbi-forward algorithm. int states = factor.States; int maxState; double maxWeight; double weight; int[,] s = new int[states, observations.Length]; double[,] lnFwd = new double[states, observations.Length]; // Base for (int i = 0; i < states; i++) { lnFwd[i, 0] = Function.Factors[0].Compute(-1, i, observations, 0); } // Induction for (int t = 1; t < observations.Length; t++) { T observation = observations[t]; for (int j = 0; j < states; j++) { maxState = 0; maxWeight = lnFwd[0, t - 1] + Function.Factors[0].Compute(0, j, observations, t); for (int i = 1; i < states; i++) { weight = lnFwd[i, t - 1] + Function.Factors[0].Compute(i, j, observations, t); if (weight > maxWeight) { maxState = i; maxWeight = weight; } } lnFwd[j, t] = maxWeight; s[j, t] = maxState; } } // Find minimum value for time T-1 maxState = 0; maxWeight = lnFwd[0, observations.Length - 1]; for (int i = 1; i < states; i++) { if (lnFwd[i, observations.Length - 1] > maxWeight) { maxState = i; maxWeight = lnFwd[i, observations.Length - 1]; } } // Trackback int[] path = new int[observations.Length]; path[path.Length - 1] = maxState; for (int t = path.Length - 2; t >= 0; t--) { path[t] = s[path[t + 1], t + 1]; } // Returns the sequence probability as an out parameter logLikelihood = maxWeight; // Returns the most likely (Viterbi path) for the given sequence return(path); }