// var poissonDist = new MathNet.Numerics.Distributions.Poisson(errorRate * coverage); public static double AssignRawPoissonQScore(int callCount, int coverage, int estimatedBaseCallQuality) // ReSharper restore InconsistentNaming { double errorRate = MathOperations.QtoP(estimatedBaseCallQuality); double callCountMinusOne = callCount - 1; double callCountDouble = callCount; var lambda = errorRate * coverage; var poissonDist = new MathNet.Numerics.Distributions.Poisson(lambda); var pValue = 1 - poissonDist.CumulativeDistribution(callCountMinusOne); if (pValue > 0) { return(MathOperations.PtoQ(pValue)); } else { //Approximation to get around precision issues. double A = poissonDist.ProbabilityLn((int)callCountMinusOne); double correction = (callCountDouble - lambda) / callCountDouble; var qScore = -10.0 * (A - Math.Log(2.0 * correction)) / Math.Log(10.0); return(qScore); } }
// Poisson.Cdf(observedCallCount - 1.0, coverage* errorRate)); public double[] Triangle_AssignQValue(double depth, double noise, double callCounts) { var poissonDist = new MathNet.Numerics.Distributions.Poisson(noise * depth); double rawCDF = poissonDist.CumulativeDistribution(callCounts - 1.0); double P = 1 - rawCDF; //Approximation to get around precision issues. double A = poissonDist.ProbabilityLn((int)callCounts - 1); double correction = (callCounts - noise * depth) / callCounts; double Qnew = -10.0 * (A - Math.Log(2.0 * correction)) / Math.Log(10.0); return(new double[] { P, Qnew }); }
/// <summary> /// Assign a q-score for a genotype call. /// </summary> public static int Compute(CalledAllele allele, int minQScore, int maxQScore) { if (allele.TotalCoverage == 0) { return(minQScore); } Genotype calledGT = allele.Genotype; //parameters float noiseHomRef = 0.05f; float noiseHomAlt = 0.075f; float expectedHetFreq = 0.40f; //a real 50% typically shows up at <50%, more like 40% or 45% float depth = (float)allele.TotalCoverage; //distributions var poissonHomRefNoise = new MathNet.Numerics.Distributions.Poisson(noiseHomRef * depth); var poissonHomAltNoise = new MathNet.Numerics.Distributions.Poisson(noiseHomAlt * depth); var binomialHomAltExpected = new MathNet.Numerics.Distributions.Binomial(expectedHetFreq, allele.TotalCoverage); var nonAlleleCalls = Math.Max(allele.TotalCoverage - allele.AlleleSupport, 0); //sanitize for funny insertion cases double LnPofH0GT = 0; //H0 is the null hypothesis. The working assumption that the GT given to the allele is correct double LnPofH1GT = 0; //H1 is the alternate hypothesis. The possibility that H0 is wrong, and the second-best GT was actually the right one //the GT Q model measures how much *more* likely H0 is than H1, given the observations. switch (calledGT) { case Genotype.HemizygousRef: LnPofH0GT = poissonHomRefNoise.ProbabilityLn(nonAlleleCalls); LnPofH1GT = binomialHomAltExpected.ProbabilityLn(nonAlleleCalls); break; case Genotype.HemizygousAlt: LnPofH0GT = poissonHomAltNoise.ProbabilityLn(nonAlleleCalls); LnPofH1GT = binomialHomAltExpected.ProbabilityLn(allele.AlleleSupport); break; default: return(minQScore); } var qScore = (int)Math.Floor(10.0 * Math.Log10(Math.E) * (LnPofH0GT - LnPofH1GT)); return(Math.Max(Math.Min(qScore, maxQScore), minQScore)); }
/// <summary> /// Assign a q-score for a genotoype call. /// </summary> public static int Compute(CalledAllele allele, int minQScore, int maxQScore) { if (allele.TotalCoverage == 0) { return(minQScore); } Genotype calledGT = allele.Genotype; //parameters float noiseHomRef = 0.05f; float noiseHomAlt = 0.075f; float noiseHetAlt = 0.10f; float expectedHetFreq = 0.40f; //a ref 50% typically shows up at <50%, more like 40% or 45% float depth = (float)allele.TotalCoverage; float support = (float)allele.AlleleSupport; //distributions var poissonHomRefNoise = new MathNet.Numerics.Distributions.Poisson(noiseHomRef * depth); var poissonHomAltNoise = new MathNet.Numerics.Distributions.Poisson(noiseHomAlt * depth); var binomialHomAltExpected = new MathNet.Numerics.Distributions.Binomial(expectedHetFreq, allele.TotalCoverage); var binomialHomRefNoise = new MathNet.Numerics.Distributions.Binomial(noiseHetAlt, allele.TotalCoverage); var binomialHomAltNoise = new MathNet.Numerics.Distributions.Binomial((1 - noiseHetAlt), allele.TotalCoverage); var nonAlleleCalls = Math.Max(allele.TotalCoverage - allele.AlleleSupport, 0); //sanitize for funny insertion cases double LnPofH0GT = 0; //H0 is the null hypothesis. The working assumption that the GT given to the allele is correct double LnPofH1GT = 0; //H1 is the alternate hypothesis. The possibility that H0 is wrong, and the second-best GT was actually the right one //the GT Q model measures how much *more* likely H0 is than H1, given the observations. switch (calledGT) { case Genotype.HomozygousRef: LnPofH0GT = poissonHomRefNoise.ProbabilityLn(nonAlleleCalls); LnPofH1GT = binomialHomAltExpected.ProbabilityLn(nonAlleleCalls); break; case Genotype.HomozygousAlt: LnPofH0GT = poissonHomAltNoise.ProbabilityLn(nonAlleleCalls); LnPofH1GT = binomialHomAltExpected.ProbabilityLn(allele.AlleleSupport); break; case Genotype.HeterozygousAlt1Alt2: case Genotype.HeterozygousAltRef: if (allele.Frequency >= 0.50) { //test alternate GT as being homAlt LnPofH0GT = binomialHomAltExpected.ProbabilityLn((int)(depth * allele.Frequency)); LnPofH1GT = binomialHomAltNoise.ProbabilityLn((int)(depth * allele.Frequency)); } else { //test alternate GT as being homRef LnPofH0GT = binomialHomAltExpected.ProbabilityLn((int)(depth * allele.Frequency)); LnPofH1GT = binomialHomRefNoise.ProbabilityLn((int)(depth * allele.Frequency)); } break; default: return(minQScore); } //note, Ln(X)=Log10 (X) / Log10 (e). // -> //Log10(A)-Log10(B) = Log10 (e) (ln (A) - ln (B)) = Log10(A/B) /* for debugging.. * var LogPofCalledGT = Math.Log10(Math.E) * (LnPofCalledGT); * var LogPofAltGT = Math.Log10(Math.E) * (LnPofAltGT); * Console.WriteLine(LogPofCalledGT); * Console.WriteLine(LogPofAltGT); */ var qScore = (int)Math.Floor(10.0 * Math.Log10(Math.E) * (LnPofH0GT - LnPofH1GT)); if ((LnPofH1GT <= int.MinValue) && (LnPofH0GT > LnPofH1GT)) //H1 infinitely more likely { return(maxQScore); } if ((LnPofH0GT <= int.MinValue) && (LnPofH0GT < LnPofH1GT)) //H0 infinitely more likely { return(minQScore); } return(Math.Max(Math.Min(qScore, maxQScore), minQScore)); }