public ContinuousFrequencySNPGenotype(BasePairFrequencies frequencies, int[] originalReadCounts, PileUp pileup = null) :
            this(GenotypeCallResult.GenotypeCalled, pileup)
        {
            Frequencies            = frequencies;
            OriginalBasePairCounts = originalReadCounts;
            var freq = frequencies.Frequencies;

            this.indexMax = 0;
            var max = freq [0];

            for (int i = 1; i < freq.Length; i++)
            {
                if (freq [i] > max)
                {
                    max      = freq [i];
                    indexMax = i;
                }
            }
        }
예제 #2
0
        /// <summary>
        /// Calculates the conditional probability that a base comes from each of A, C, G, T
        /// based on the observed read and quality and current parameter settings.
        /// </summary>
        /// <returns>The gonditional probabilities.</returns>
        /// <param name="freqs">Freqs.</param>
        /// <param name="data">Data.</param>
        /// <param name="bp">Bp.</param>
        private static double updateConditionalProbabilities(BasePairFrequencies freqs, double[] data, BaseAndQuality bp)
        {
            //Calculate conditional probability that the base comes from the observed base.

            double probRight = BaseQualityUtils.GetCorrectProbability(bp.PhredScore);
            double probWrong = .333333333 * BaseQualityUtils.GetErrorProbability(bp.PhredScore);
            double totProb   = 0.0;

            for (int i = 0; i < data.Length; i++)
            {
                var freq = freqs.Frequencies[i];
                if (freq != 0.0)
                {
                    double prob = freq * (i == bp.Base ? probRight : probWrong);
                    totProb += prob;
                    data[i]  = prob;
                }
            }
            for (int i = 0; i < data.Length; i++)
            {
                data[i] /= totProb;
            }
            return(Math.Log(totProb));
        }
예제 #3
0
        private static ContinuousFrequencySNPGenotype callGenotype(PileUp pu)
        {
            //If it looks like a deletion, skip it
            if (pileupHasTooManyIndels(pu))
            {
                return(new ContinuousFrequencySNPGenotype(GenotypeCallResult.TooManyGaps, pu));
            }

            // Otherwise, drop gaps, ambiguous bases and low scoring reads.
            var filteredBases = pu.Bases.Where(z => z.Base != BaseAndQuality.N_BASE_INDEX &&
                                               z.Base != BaseAndQuality.GAP_BASE_INDEX && z.PhredScore > 17).ToArray();

            if (filteredBases.Length == 0)
            {
                return(new ContinuousFrequencySNPGenotype(GenotypeCallResult.NoData));
            }

            // initialize the continuous frequency based on counts of bases.
            var base_pair_counts = new int[BasePairFrequencies.NUM_BASES];

            foreach (var bp in filteredBases)
            {
                base_pair_counts [bp.Base]++;
            }
            var freqs = base_pair_counts.Select(x => x / (double)filteredBases.Length).ToArray();
            var theta = new BasePairFrequencies(freqs);

            //if only one base has data or if
            //if we are not doing EM optimization, we are done.
            if (base_pair_counts.Count(x => x > 0) > 1 && DO_EM_ESTIMATION)
            {
                //first make an NumReads * Num_Bases Matrix
                double[][] conditionalProbs = new double[filteredBases.Length][];
                for (int i = 0; i < filteredBases.Length; i++)
                {
                    conditionalProbs [i] = new double[BasePairFrequencies.NUM_BASES];
                }

                double likDif   = double.MaxValue;
                double last_lik = double.MinValue;
                while (likDif > 1e-3)
                {
                    double lik = 0;
                    for (int i = 0; i < conditionalProbs.Length; i++)
                    {
                        lik += updateConditionalProbabilities(theta, conditionalProbs [i], filteredBases [i]);
                    }
                    likDif   = lik - last_lik;
                    last_lik = lik;

                    //now update thetas by summing the conditional probability of each value
                    Array.Clear(theta.Frequencies, 0, BasePairFrequencies.NUM_BASES);
                    for (int i = 0; i < conditionalProbs.Length; i++)
                    {
                        var cur = conditionalProbs [i];
                        for (int j = 0; j < cur.Length; j++)
                        {
                            theta.Frequencies [j] += cur [j];
                        }
                    }
                    for (int j = 0; j < theta.Frequencies.Length; j++)
                    {
                        theta.Frequencies [j] /= (double)conditionalProbs.Length;
                    }
                }
            }
            return(new ContinuousFrequencySNPGenotype(theta, base_pair_counts, pu));
        }