private double run(Array[] observations) { // Baum-Welch algorithm. // The Baum–Welch algorithm is a particular case of a generalized expectation-maximization // (GEM) algorithm. It can compute maximum likelihood estimates and posterior mode estimates // for the parameters (transition and emission probabilities) of an HMM, when given only // emissions as training data. // The algorithm has two steps: // - Calculating the forward probability and the backward probability for each HMM state; // - On the basis of this, determining the frequency of the transition-emission pair values // and dividing it by the probability of the entire string. This amounts to calculating // the expected count of the particular transition-emission pair. Each time a particular // transition is found, the value of the quotient of the transition divided by the probability // of the entire string goes up, and this value can then be made the new value of the transition. // Grab model information int states = model.States; var logA = model.Transitions; var logP = model.Probabilities; // Initialize the algorithm int N = observations.Length; double logN = Math.Log(N); LogKsi = new double[N][][,]; LogGamma = new double[N][,]; for (int i = 0; i < observations.Length; i++) { int T = observations[i].Length; LogKsi[i] = new double[T][,]; LogGamma[i] = new double[T, states]; for (int t = 0; t < LogKsi[i].Length; t++) LogKsi[i][t] = new double[states, states]; } bool stop = false; int TMax = observations.Max(x => x.Length); double[,] lnFwd = new double[TMax, states]; double[,] lnBwd = new double[TMax, states]; // Initialize the model log-likelihoods double newLogLikelihood = Double.NegativeInfinity; convergence.NewValue = Double.NegativeInfinity; int itersLeft = 30; do // Until convergence or max iterations is reached { itersLeft--; // For each sequence in the observations input for (int i = 0; i < observations.Length; i++) { int T = observations[i].Length; double[,] logGamma = LogGamma[i]; double w = LogWeights[i]; // 1st step - Calculating the forward probability and the // backward probability for each HMM state. ComputeForwardBackward(i, lnFwd, lnBwd); // 2nd step - Determining the frequency of the transition-emission pair values // and dividing it by the probability of the entire string. // Calculate gamma values for next computations for (int t = 0; t < T; t++) { double lnsum = Double.NegativeInfinity; for (int k = 0; k < states; k++) { logGamma[t, k] = lnFwd[t, k] + lnBwd[t, k] + w; lnsum = Special.LogSum(lnsum, logGamma[t, k]); } // System.Diagnostics.Debug.Assert(!Double.IsNaN(lnsum)); // Normalize if different from zero if (lnsum != Double.NegativeInfinity) for (int k = 0; k < states; k++) logGamma[t, k] = logGamma[t, k] - lnsum; } // Calculate ksi values for next computations ComputeKsi(i, lnFwd, lnBwd); // Compute log-likelihood for the given sequence for (int j = 0; j < states; j++) newLogLikelihood = Special.LogSum(newLogLikelihood, lnFwd[T - 1, j]); } // Average the likelihood for all sequences newLogLikelihood /= observations.Length; convergence.NewValue = newLogLikelihood; // Check for convergence if (!convergence.HasConverged) { // We haven't converged yet // 3. Continue with parameter re-estimation newLogLikelihood = Double.NegativeInfinity; // 3.1 Re-estimation of initial state probabilities for (int i = 0; i < logP.Length; i++) { double lnsum = Double.NegativeInfinity; for (int k = 0; k < LogGamma.Length; k++) lnsum = Special.LogSum(lnsum, LogGamma[k][0, i]); logP[i] = lnsum - logN; } // 3.2 Re-estimation of transition probabilities for (int i = 0; i < states; i++) { for (int j = 0; j < states; j++) { double lnnum = Double.NegativeInfinity; double lnden = Double.NegativeInfinity; for (int k = 0; k < LogGamma.Length; k++) { int T = observations[k].Length; for (int t = 0; t < T - 1; t++) { lnnum = Special.LogSum(lnnum, LogKsi[k][t][i, j]); lnden = Special.LogSum(lnden, LogGamma[k][t, i]); } } logA[i, j] = (lnnum == lnden) ? 0 : lnnum - lnden; System.Diagnostics.Debug.Assert(!Double.IsNaN(logA[i, j])); } } // 3.3 Re-estimation of emission probabilities UpdateEmissions(); // discrete and continuous } else { stop = true; // The model has converged. } } while (!stop && itersLeft > 0); // Returns the model average log-likelihood return newLogLikelihood; }