// var poissonDist = new MathNet.Numerics.Distributions.Poisson(errorRate * coverage); public static double AssignRawPoissonQScore(int callCount, int coverage, int estimatedBaseCallQuality) // ReSharper restore InconsistentNaming { double errorRate = MathOperations.QtoP(estimatedBaseCallQuality); double callCountMinusOne = callCount - 1; double callCountDouble = callCount; var lambda = errorRate * coverage; var poissonDist = new MathNet.Numerics.Distributions.Poisson(lambda); var pValue = 1 - poissonDist.CumulativeDistribution(callCountMinusOne); if (pValue > 0) { return(MathOperations.PtoQ(pValue)); } else { //Approximation to get around precision issues. double A = poissonDist.ProbabilityLn((int)callCountMinusOne); double correction = (callCountDouble - lambda) / callCountDouble; var qScore = -10.0 * (A - Math.Log(2.0 * correction)) / Math.Log(10.0); return(qScore); } }
public static double AssignPValue(int observedCallCount, int coverage, int estimatedBaseCallQuality) { double errorRate = MathOperations.QtoP(estimatedBaseCallQuality); if (observedCallCount == 0) { return(1.0); } return(1 - Poisson.Cdf(observedCallCount - 1.0, coverage * errorRate)); }
public static int Compute(CalledAllele allele, float targetLimitOfDetectionVF, int minGTQScore, int maxGTQScore) { double rawQ = allele.VariantQscore; if ((allele.TotalCoverage == 0) || (allele.IsNocall)) { return(minGTQScore); } if ((allele.Genotype == Genotype.HomozygousRef) || (allele.Genotype == Genotype.HomozygousAlt)) { //a homozygous somatic call GT is a fairly strong statement. It implies //A) we found the allele for sure (the VariantQscore) var p1 = MathOperations.QtoP(allele.VariantQscore); //and //B) the chance that we missed any alternate calls is very small. // this would be the chance false negative given VF=min freq, and coverage is as given. //these are explictly typed, to prevent any win/linux diffs sneaking in // in float -> double conversions inside downstream arguments float nonAlleleObservationsF = (1f - allele.Frequency) * allele.TotalCoverage; float expectedNonAllelObservationsF = targetLimitOfDetectionVF * allele.TotalCoverage; //This takes care of the cases: //A) we dont have enough depth to ever observe any non-ref variant. If, if depth is 10, we would never see a 5% variant anyway. //B) if we see 6% not reference > 5% min safe var call freqeuncy, we are pretty worried about calling this as a 0/0 GT if (nonAlleleObservationsF >= expectedNonAllelObservationsF) { return(minGTQScore); } //var p2 = poissonDist.CumulativeDistribution(nonRefObservations); <- this method does badly for values lower than the mean var p2 = Poisson.Cdf(nonAlleleObservationsF, expectedNonAllelObservationsF); rawQ = MathOperations.PtoQ(p1 + p2); } var qScore = Math.Min(maxGTQScore, rawQ); qScore = Math.Max(qScore, minGTQScore); return((int)Math.Round(qScore)); }