示例#1
0
        /// <summary>
        /// HSMM Viterbi implementtion based on:
        /// Guedon, Y. (2003), Estimating hidden semi-Markov chains from discrete sequences, Journal of
        /// Computational and Graphical Statistics, Volume 12, Number 3, page 604-639 - 2003
        /// </summary>
        /// <param name="x"></param>
        /// <param name="haploidMeans"></param>
        /// <returns></returns>
        public List <int> BestHsmmPathViterbi(List <List <double> > x, List <double> haploidMeans)
        {
            // Initialization
            var length            = x.Count;
            var alpha             = CanvasCommon.Utilities.MatrixCreate(nStates, length + 1);
            var bestStateDuration = new int[nStates][];
            var bestStateIndex    = new int[nStates][];

            for (int i = 0; i < nStates; ++i)
            {
                bestStateIndex[i]    = new int[length];
                bestStateDuration[i] = new int[length];
            }
            for (int j = 0; j < nStates; j++)
            {
                alpha[j][0] = this._stateProbabilities[j];
            }

            var maxStateLength = 90;
            var sojournMeans   = new List <int> {
                10, 10, 80, 50, 50
            };
            var stateDurationProbability = GetStateDurationProbability(sojournMeans, maxStateLength);
            var sojournLastState         = CalculateSojourn(maxStateLength, sojournMeans);

            double emissionSequence     = 0;
            double tempEmissionSequence = 0;
            var    bestState            = 0;
            var    firstState           = true;
            var    firstI     = true;
            var    transition = Enumerable.Repeat(1.0, nStates).ToArray();


            // Induction
            for (int t = 1; t < length - 1; t++)
            {
                for (int j = 0; j < nStates; j++)
                {
                    emissionSequence = 0;
                    firstState       = true;

                    for (int stateDuration = 1; stateDuration < Math.Min(maxStateLength, t); stateDuration += 2)
                    {
                        firstI = true;
                        for (int i = 0; i < nStates; i++)
                        {
                            if (i == j)
                            {
                                continue;
                            }
                            if (Math.Log(_transition[i][j]) + alpha[i][t - stateDuration] > tempEmissionSequence || firstI)
                            {
                                tempEmissionSequence = Math.Log(_transition[i][j]) + alpha[i][t - stateDuration];
                                bestState            = i;
                                firstI = false;
                            }
                        }
                        if (firstState || emissionSequence + stateDurationProbability[j][stateDuration] + tempEmissionSequence > alpha[j][t])
                        {
                            alpha[j][t]             = emissionSequence + stateDurationProbability[j][stateDuration] + tempEmissionSequence;
                            bestStateDuration[j][t] = stateDuration;
                            bestStateIndex[j][t]    = bestState;
                            firstState = false;
                        }
                        emissionSequence += _emission.EstimateViterbiLikelihood(x[t - stateDuration], j, haploidMeans, transition);
                    }

                    if (t + 1 <= maxStateLength)
                    {
                        if (firstState || emissionSequence + Math.Log(Poisson.PMF(sojournMeans[j], t + 1) * _stateProbabilities[j]) > alpha[j][t])
                        {
                            alpha[j][t]             = emissionSequence + Math.Log(Poisson.PMF(sojournMeans[j], t + 1) * _stateProbabilities[j]);
                            bestStateDuration[j][t] = -1;
                            bestStateIndex[j][t]    = -1;
                        }
                    }
                    alpha[j][t] += _emission.EstimateViterbiLikelihood(x[t], j, haploidMeans, transition);
                }
            }


            for (int j = 0; j < nStates; j++)
            {
                emissionSequence = 0;
                firstState       = true;
                for (int stateDuration = 1; stateDuration < maxStateLength - 1; stateDuration++)
                {
                    firstI = true;
                    for (int i = 0; i < nStates; i++)
                    {
                        if (i == j)
                        {
                            continue;
                        }
                        if (Math.Log(_transition[i][j]) + alpha[i][length - 1 - stateDuration] > tempEmissionSequence || firstI)
                        {
                            tempEmissionSequence = Math.Log(_transition[i][j]) + alpha[i][length - 1 - stateDuration];
                            bestState            = i;
                            firstI = false;
                        }
                    }

                    if (emissionSequence + Math.Log(sojournLastState[j][Math.Min(stateDuration, maxStateLength)]) + tempEmissionSequence > alpha[j][length - 1] || firstState)
                    {
                        alpha[j][length - 1]             = emissionSequence + Math.Log(sojournLastState[j][Math.Min(stateDuration, maxStateLength)]) + tempEmissionSequence;
                        bestStateDuration[j][length - 1] = stateDuration;
                        bestStateIndex[j][length - 1]    = bestState;
                        firstState = false;
                    }
                    emissionSequence += _emission.EstimateViterbiLikelihood(x[length - 1 - stateDuration], j, haploidMeans, transition);
                }

                if (emissionSequence + Math.Log(sojournLastState[j][Math.Min(length - 1, maxStateLength)] * _stateProbabilities[j]) > alpha[j][length - 1] || firstState)
                {
                    alpha[j][length - 1]             = emissionSequence + Math.Log(sojournLastState[j][Math.Min(length, maxStateLength)] * _stateProbabilities[j]);
                    bestStateDuration[j][length - 1] = -1;
                    bestStateIndex[j][length - 1]    = -1;
                }
                alpha[j][length - 1] += _emission.EstimateViterbiLikelihood(x[length - 1], j, haploidMeans, transition);
            }

            // backtracking
            List <int> finalStates = Enumerable.Repeat(2, length).ToList();

            int T = length - 1;

            while (bestStateIndex[bestState][T] >= 0)
            {
                for (int i = T; i >= T - bestStateDuration[bestState][T] + 1; i--)
                {
                    finalStates[i] = bestState;
                }
                var alternativeBestState = bestState;
                bestState = bestStateIndex[bestState][T];

                T -= bestStateDuration[alternativeBestState][T];
            }
            finalStates.Reverse();
            OutlierMask(finalStates);
            SmallSegmentsMask(finalStates);
            OversegmentationMask(finalStates);
            return(finalStates);
        }
示例#2
0
        /// <summary>
        /// Standard Viterbi algorithm for finding the best path through the sequence
        /// see Rabiner, Lawrence R. "A tutorial on hidden Markov models and selected applications in speech recognition."
        /// Proceedings of the IEEE 77.2 (1989): 257-286.
        /// </summary>
        /// <param name="depthList"></param>
        /// <param name="start"></param>
        /// <param name="haploidMeans"></param>
        /// <returns></returns>
        public List <int> BestPathViterbi(List <List <double> > depthList, uint[] start, List <double> haploidMeans)
        {
            var x = depthList;

            // Initialization
            var size = x.Count;

            double[][] bestScore         = CanvasCommon.Utilities.MatrixCreate(size, nStates);
            int[][]    bestStateSequence = new int[size][];
            for (int i = 0; i < size; ++i)
            {
                bestStateSequence[i] = new int[nStates];
            }

            for (int j = 0; j < nStates; j++)
            {
                // This should be the score for emitting the first data element, combining the initial state prob with the emission prob.
                // The right way to make the change here is to refactor such that we can get the emission probability separate from the transition probability,
                // but for now just hack: subtract off the transition probability
                bestScore[0][j]         = Math.Log(this._stateProbabilities[j]) + _emission.EstimateViterbiLikelihood(x[0], j, haploidMeans, _transition[0]) - Math.Log(_transition[0][j]);
                bestStateSequence[0][j] = -1;
            }

            // Induction
            for (int t = 1; t < size; t++)
            {
                for (int j = 0; j < nStates; j++)
                {
                    int    state = 0;
                    double max   = Double.MinValue;
                    for (int i = 0; i < nStates; i++)
                    {
                        var vitLogL = _emission.EstimateViterbiLikelihood(x[t], j, haploidMeans, _transition[i]);
                        var tmpMax  = bestScore[t - 1][i] + vitLogL;
                        if (tmpMax > max)
                        {
                            state = i;
                            max   = tmpMax;
                        }
                    }
                    bestScore[t][j]         = max;
                    bestStateSequence[t][j] = state;
                }
            }

            int bestState  = -1;
            var bestStates = new List <int>(size);
            var max1       = Double.MinValue;

            for (int i = 0; i < nStates; i++)
            {
                var tmpMax = bestScore[size - 1][i];
                if (tmpMax > max1)
                {
                    bestState = i;
                    max1      = tmpMax;
                }
            }

            // backtracking
            var backtrack = size - 1;

            while (backtrack > 0)
            {
                bestStates.Add(bestState);
                bestState = bestStateSequence[backtrack][bestState];
                backtrack--;
            }
            bestStates.Add(bestState);

            bestStates.Reverse();
            return(bestStates);
        }