Пример #1
0
        public static double AssignPValue(int observedCallCount, int coverage, int estimatedBaseCallQuality)
        {
            double errorRate = MathOperations.QtoP(estimatedBaseCallQuality);

            if (observedCallCount == 0)
            {
                return(1.0);
            }

            return(1 - Poisson.Cdf(observedCallCount - 1.0, coverage * errorRate));
        }
Пример #2
0
        public StrandBiasStats(double support, double coverage, double noiseFreq, double minDetectableSNP,
                               StrandBiasModel strandBiasModel)
        {
            Frequency = support / coverage;
            Support   = support;
            Coverage  = coverage;

            if (support == 0)
            {
                if (strandBiasModel == StrandBiasModel.Poisson)
                {
                    ChanceFalsePos = 1;
                    ChanceVarFreqGreaterThanZero = 0;
                    ChanceFalseNeg = 0;
                }
                else if (strandBiasModel == StrandBiasModel.Extended)
                {
                    //the chance that we observe the SNP is (minDetectableSNPfreq) for one observation.
                    //the chance that we do not is (1- minDetectableSNPfreq) for one observation.
                    //the chance that we do not observe it, N times in a row is:
                    ChanceVarFreqGreaterThanZero = (Math.Pow(1 - minDetectableSNP, coverage)); //used in SB metric

                    //liklihood that variant really does not exist
                    //= 1 - chance that it does but you did not see it
                    ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero; //used in SB metric

                    //Chance a low freq variant is at work in the model, and we did not observe it:
                    ChanceFalseNeg = ChanceVarFreqGreaterThanZero;
                }
            }
            else
            {
                // chance of these observations or less, given min observable variant distribution
                ChanceVarFreqGreaterThanZero = Poisson.Cdf(support - 1, coverage * noiseFreq); //used in SB metric
                ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero;                             //used in SB metric
                ChanceFalseNeg = Poisson.Cdf(support, coverage * minDetectableSNP);
            }

            //Note:
            //
            // Type 1 error is when we rejected the null hypothesis when we should not have. (we have noise, but called a SNP)
            // Type 2 error is when we accepected the alternate when we should not have. (we have a variant, but we did not call it.)
            //
            // Type 1 error is our this.ChanceFalsePos aka p-value.
            // Type 2 error is out this.ChanceFalseNeg
        }
Пример #3
0
        public static int Compute(CalledAllele allele, float targetLimitOfDetectionVF, int minGTQScore, int maxGTQScore)
        {
            double rawQ = allele.VariantQscore;

            if ((allele.TotalCoverage == 0) || (allele.IsNocall))
            {
                return(minGTQScore);
            }

            if ((allele.Genotype == Genotype.HomozygousRef) || (allele.Genotype == Genotype.HomozygousAlt))
            {
                //a homozygous somatic call GT is a fairly strong statement. It implies
                //A) we found the allele for sure (the VariantQscore)
                var p1 = MathOperations.QtoP(allele.VariantQscore);

                //and
                //B) the chance that we missed any alternate calls is very small.
                // this would be the chance false negative given VF=min freq, and coverage is as given.

                //these are explictly typed, to prevent any win/linux diffs sneaking in
                // in float -> double conversions inside downstream arguments
                float nonAlleleObservationsF        = (1f - allele.Frequency) * allele.TotalCoverage;
                float expectedNonAllelObservationsF = targetLimitOfDetectionVF * allele.TotalCoverage;


                //This takes care of the cases:
                //A) we dont have enough depth to ever observe any non-ref variant. If, if depth is 10, we would never see a 5% variant anyway.
                //B) if we see 6% not reference > 5% min safe var call freqeuncy, we are pretty worried about calling this as a 0/0 GT
                if (nonAlleleObservationsF >= expectedNonAllelObservationsF)
                {
                    return(minGTQScore);
                }

                //var p2 = poissonDist.CumulativeDistribution(nonRefObservations); <- this method does badly for values lower than the mean
                var p2 = Poisson.Cdf(nonAlleleObservationsF, expectedNonAllelObservationsF);
                rawQ = MathOperations.PtoQ(p1 + p2);
            }

            var qScore = Math.Min(maxGTQScore, rawQ);

            qScore = Math.Max(qScore, minGTQScore);
            return((int)Math.Round(qScore));
        }