/// <summary> /// HSMM Viterbi implementtion based on: /// Guedon, Y. (2003), Estimating hidden semi-Markov chains from discrete sequences, Journal of /// Computational and Graphical Statistics, Volume 12, Number 3, page 604-639 - 2003 /// </summary> /// <param name="x"></param> /// <param name="haploidMeans"></param> /// <returns></returns> public List <int> BestHsmmPathViterbi(List <List <double> > x, List <double> haploidMeans) { // Initialization var length = x.Count; var alpha = CanvasCommon.Utilities.MatrixCreate(nStates, length + 1); var bestStateDuration = new int[nStates][]; var bestStateIndex = new int[nStates][]; for (int i = 0; i < nStates; ++i) { bestStateIndex[i] = new int[length]; bestStateDuration[i] = new int[length]; } for (int j = 0; j < nStates; j++) { alpha[j][0] = this._stateProbabilities[j]; } var maxStateLength = 90; var sojournMeans = new List <int> { 10, 10, 80, 50, 50 }; var stateDurationProbability = GetStateDurationProbability(sojournMeans, maxStateLength); var sojournLastState = CalculateSojourn(maxStateLength, sojournMeans); double emissionSequence = 0; double tempEmissionSequence = 0; var bestState = 0; var firstState = true; var firstI = true; var transition = Enumerable.Repeat(1.0, nStates).ToArray(); // Induction for (int t = 1; t < length - 1; t++) { for (int j = 0; j < nStates; j++) { emissionSequence = 0; firstState = true; for (int stateDuration = 1; stateDuration < Math.Min(maxStateLength, t); stateDuration += 2) { firstI = true; for (int i = 0; i < nStates; i++) { if (i == j) { continue; } if (Math.Log(_transition[i][j]) + alpha[i][t - stateDuration] > tempEmissionSequence || firstI) { tempEmissionSequence = Math.Log(_transition[i][j]) + alpha[i][t - stateDuration]; bestState = i; firstI = false; } } if (firstState || emissionSequence + stateDurationProbability[j][stateDuration] + tempEmissionSequence > alpha[j][t]) { alpha[j][t] = emissionSequence + stateDurationProbability[j][stateDuration] + tempEmissionSequence; bestStateDuration[j][t] = stateDuration; bestStateIndex[j][t] = bestState; firstState = false; } emissionSequence += _emission.EstimateViterbiLikelihood(x[t - stateDuration], j, haploidMeans, transition); } if (t + 1 <= maxStateLength) { if (firstState || emissionSequence + Math.Log(Poisson.PMF(sojournMeans[j], t + 1) * _stateProbabilities[j]) > alpha[j][t]) { alpha[j][t] = emissionSequence + Math.Log(Poisson.PMF(sojournMeans[j], t + 1) * _stateProbabilities[j]); bestStateDuration[j][t] = -1; bestStateIndex[j][t] = -1; } } alpha[j][t] += _emission.EstimateViterbiLikelihood(x[t], j, haploidMeans, transition); } } for (int j = 0; j < nStates; j++) { emissionSequence = 0; firstState = true; for (int stateDuration = 1; stateDuration < maxStateLength - 1; stateDuration++) { firstI = true; for (int i = 0; i < nStates; i++) { if (i == j) { continue; } if (Math.Log(_transition[i][j]) + alpha[i][length - 1 - stateDuration] > tempEmissionSequence || firstI) { tempEmissionSequence = Math.Log(_transition[i][j]) + alpha[i][length - 1 - stateDuration]; bestState = i; firstI = false; } } if (emissionSequence + Math.Log(sojournLastState[j][Math.Min(stateDuration, maxStateLength)]) + tempEmissionSequence > alpha[j][length - 1] || firstState) { alpha[j][length - 1] = emissionSequence + Math.Log(sojournLastState[j][Math.Min(stateDuration, maxStateLength)]) + tempEmissionSequence; bestStateDuration[j][length - 1] = stateDuration; bestStateIndex[j][length - 1] = bestState; firstState = false; } emissionSequence += _emission.EstimateViterbiLikelihood(x[length - 1 - stateDuration], j, haploidMeans, transition); } if (emissionSequence + Math.Log(sojournLastState[j][Math.Min(length - 1, maxStateLength)] * _stateProbabilities[j]) > alpha[j][length - 1] || firstState) { alpha[j][length - 1] = emissionSequence + Math.Log(sojournLastState[j][Math.Min(length, maxStateLength)] * _stateProbabilities[j]); bestStateDuration[j][length - 1] = -1; bestStateIndex[j][length - 1] = -1; } alpha[j][length - 1] += _emission.EstimateViterbiLikelihood(x[length - 1], j, haploidMeans, transition); } // backtracking List <int> finalStates = Enumerable.Repeat(2, length).ToList(); int T = length - 1; while (bestStateIndex[bestState][T] >= 0) { for (int i = T; i >= T - bestStateDuration[bestState][T] + 1; i--) { finalStates[i] = bestState; } var alternativeBestState = bestState; bestState = bestStateIndex[bestState][T]; T -= bestStateDuration[alternativeBestState][T]; } finalStates.Reverse(); OutlierMask(finalStates); SmallSegmentsMask(finalStates); OversegmentationMask(finalStates); return(finalStates); }
/// <summary> /// Standard Viterbi algorithm for finding the best path through the sequence /// see Rabiner, Lawrence R. "A tutorial on hidden Markov models and selected applications in speech recognition." /// Proceedings of the IEEE 77.2 (1989): 257-286. /// </summary> /// <param name="depthList"></param> /// <param name="start"></param> /// <param name="haploidMeans"></param> /// <returns></returns> public List <int> BestPathViterbi(List <List <double> > depthList, uint[] start, List <double> haploidMeans) { var x = depthList; // Initialization var size = x.Count; double[][] bestScore = CanvasCommon.Utilities.MatrixCreate(size, nStates); int[][] bestStateSequence = new int[size][]; for (int i = 0; i < size; ++i) { bestStateSequence[i] = new int[nStates]; } for (int j = 0; j < nStates; j++) { // This should be the score for emitting the first data element, combining the initial state prob with the emission prob. // The right way to make the change here is to refactor such that we can get the emission probability separate from the transition probability, // but for now just hack: subtract off the transition probability bestScore[0][j] = Math.Log(this._stateProbabilities[j]) + _emission.EstimateViterbiLikelihood(x[0], j, haploidMeans, _transition[0]) - Math.Log(_transition[0][j]); bestStateSequence[0][j] = -1; } // Induction for (int t = 1; t < size; t++) { for (int j = 0; j < nStates; j++) { int state = 0; double max = Double.MinValue; for (int i = 0; i < nStates; i++) { var vitLogL = _emission.EstimateViterbiLikelihood(x[t], j, haploidMeans, _transition[i]); var tmpMax = bestScore[t - 1][i] + vitLogL; if (tmpMax > max) { state = i; max = tmpMax; } } bestScore[t][j] = max; bestStateSequence[t][j] = state; } } int bestState = -1; var bestStates = new List <int>(size); var max1 = Double.MinValue; for (int i = 0; i < nStates; i++) { var tmpMax = bestScore[size - 1][i]; if (tmpMax > max1) { bestState = i; max1 = tmpMax; } } // backtracking var backtrack = size - 1; while (backtrack > 0) { bestStates.Add(bestState); bestState = bestStateSequence[backtrack][bestState]; backtrack--; } bestStates.Add(bestState); bestStates.Reverse(); return(bestStates); }