public static double AssignPValue(int observedCallCount, int coverage, int estimatedBaseCallQuality) { double errorRate = MathOperations.QtoP(estimatedBaseCallQuality); if (observedCallCount == 0) { return(1.0); } return(1 - Poisson.Cdf(observedCallCount - 1.0, coverage * errorRate)); }
public StrandBiasStats(double support, double coverage, double noiseFreq, double minDetectableSNP, StrandBiasModel strandBiasModel) { Frequency = support / coverage; Support = support; Coverage = coverage; if (support == 0) { if (strandBiasModel == StrandBiasModel.Poisson) { ChanceFalsePos = 1; ChanceVarFreqGreaterThanZero = 0; ChanceFalseNeg = 0; } else if (strandBiasModel == StrandBiasModel.Extended) { //the chance that we observe the SNP is (minDetectableSNPfreq) for one observation. //the chance that we do not is (1- minDetectableSNPfreq) for one observation. //the chance that we do not observe it, N times in a row is: ChanceVarFreqGreaterThanZero = (Math.Pow(1 - minDetectableSNP, coverage)); //used in SB metric //liklihood that variant really does not exist //= 1 - chance that it does but you did not see it ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero; //used in SB metric //Chance a low freq variant is at work in the model, and we did not observe it: ChanceFalseNeg = ChanceVarFreqGreaterThanZero; } } else { // chance of these observations or less, given min observable variant distribution ChanceVarFreqGreaterThanZero = Poisson.Cdf(support - 1, coverage * noiseFreq); //used in SB metric ChanceFalsePos = 1 - ChanceVarFreqGreaterThanZero; //used in SB metric ChanceFalseNeg = Poisson.Cdf(support, coverage * minDetectableSNP); } //Note: // // Type 1 error is when we rejected the null hypothesis when we should not have. (we have noise, but called a SNP) // Type 2 error is when we accepected the alternate when we should not have. (we have a variant, but we did not call it.) // // Type 1 error is our this.ChanceFalsePos aka p-value. // Type 2 error is out this.ChanceFalseNeg }
public static int Compute(CalledAllele allele, float targetLimitOfDetectionVF, int minGTQScore, int maxGTQScore) { double rawQ = allele.VariantQscore; if ((allele.TotalCoverage == 0) || (allele.IsNocall)) { return(minGTQScore); } if ((allele.Genotype == Genotype.HomozygousRef) || (allele.Genotype == Genotype.HomozygousAlt)) { //a homozygous somatic call GT is a fairly strong statement. It implies //A) we found the allele for sure (the VariantQscore) var p1 = MathOperations.QtoP(allele.VariantQscore); //and //B) the chance that we missed any alternate calls is very small. // this would be the chance false negative given VF=min freq, and coverage is as given. //these are explictly typed, to prevent any win/linux diffs sneaking in // in float -> double conversions inside downstream arguments float nonAlleleObservationsF = (1f - allele.Frequency) * allele.TotalCoverage; float expectedNonAllelObservationsF = targetLimitOfDetectionVF * allele.TotalCoverage; //This takes care of the cases: //A) we dont have enough depth to ever observe any non-ref variant. If, if depth is 10, we would never see a 5% variant anyway. //B) if we see 6% not reference > 5% min safe var call freqeuncy, we are pretty worried about calling this as a 0/0 GT if (nonAlleleObservationsF >= expectedNonAllelObservationsF) { return(minGTQScore); } //var p2 = poissonDist.CumulativeDistribution(nonRefObservations); <- this method does badly for values lower than the mean var p2 = Poisson.Cdf(nonAlleleObservationsF, expectedNonAllelObservationsF); rawQ = MathOperations.PtoQ(p1 + p2); } var qScore = Math.Min(maxGTQScore, rawQ); qScore = Math.Max(qScore, minGTQScore); return((int)Math.Round(qScore)); }