Ejemplo n.º 1
0
        /// <summary>
        ///     http://www.broadinstitute.org/gsa/wiki/index.php/Understanding_the_Unified_Genotyper%27s_VCF_files
        ///     See section on Strand Bias
        /// </summary>
        // From GATK source:
        //double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
        //double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
        //
        //// strand score is max bias between forward and reverse strands
        //double strandScore = Math.max(forwardLod, reverseLod);
        //
        //// rescale by a factor of 10
        //strandScore *= 10.0;
        //
        //attributes.put("SB", strandScore);
        private static double[] AssignBiasScore(StrandBiasStats overallStats, StrandBiasStats fwdStats, StrandBiasStats rvsStats)
        {
            var forwardBias = (fwdStats.ChanceVarFreqGreaterThanZero * rvsStats.ChanceFalsePos) /
                              overallStats.ChanceVarFreqGreaterThanZero;
            var reverseBias = (rvsStats.ChanceVarFreqGreaterThanZero * fwdStats.ChanceFalsePos) /
                              overallStats.ChanceVarFreqGreaterThanZero;

            if (overallStats.ChanceVarFreqGreaterThanZero == 0)
            {
                forwardBias = 1;
                reverseBias = 1;
            }

            var p = Math.Max(forwardBias, reverseBias);

            return(new[] { p, MathOperations.PtoGATKBiasScale(p) });
        }
Ejemplo n.º 2
0
        public static StrandBiasStats DeepCopy(StrandBiasStats originalStats)
        {
            if (originalStats == null)
            {
                return(null);
            }

            var newStats = new StrandBiasStats(originalStats.Support, originalStats.Coverage)
            {
                ChanceFalseNeg = originalStats.ChanceFalseNeg,
                ChanceFalsePos = originalStats.ChanceFalsePos,
                ChanceVarFreqGreaterThanZero = originalStats.ChanceVarFreqGreaterThanZero,
                Frequency = originalStats.Frequency,
            };

            return(newStats);
        }
Ejemplo n.º 3
0
        public static void PopulateStats(StrandBiasStats stats, double noiseFreq, double minDetectableSNP,
                                         StrandBiasModel strandBiasModel)
        {
            if (stats.Support == 0)
            {
                if (strandBiasModel == StrandBiasModel.Poisson)
                {
                    stats.ChanceFalsePos = 1;
                    stats.ChanceVarFreqGreaterThanZero = 0;
                    stats.ChanceFalseNeg = 0;
                }
                else if (strandBiasModel == StrandBiasModel.Extended)
                {
                    //the chance that we observe the SNP is (minDetectableSNPfreq) for one observation.
                    //the chance that we do not is (1- minDetectableSNPfreq) for one observation.
                    //the chance that we do not observe it, N times in a row is:
                    stats.ChanceVarFreqGreaterThanZero = (Math.Pow(1 - minDetectableSNP, stats.Coverage)); //used in SB metric

                    //liklihood that variant really does not exist
                    //= 1 - chance that it does but you did not see it
                    stats.ChanceFalsePos = 1 - stats.ChanceVarFreqGreaterThanZero; //used in SB metric

                    //Chance a low freq variant is at work in the model, and we did not observe it:
                    stats.ChanceFalseNeg = stats.ChanceVarFreqGreaterThanZero;
                }
            }
            else
            {
                // chance of these observations or less, given min observable variant distribution
                stats.ChanceVarFreqGreaterThanZero = Poisson.Cdf(stats.Support - 1, stats.Coverage * noiseFreq); //used in SB metric
                stats.ChanceFalsePos = 1 - stats.ChanceVarFreqGreaterThanZero;                                   //used in SB metric
                stats.ChanceFalseNeg = Poisson.Cdf(stats.Support, stats.Coverage * minDetectableSNP);
            }

            //Note:
            //
            // Type 1 error is when we rejected the null hypothesis when we should not have. (we have noise, but called a SNP)
            // Type 2 error is when we accepected the alternate when we should not have. (we have a variant, but we did not call it.)
            //
            // Type 1 error is our this.ChanceFalsePos aka p-value.
            // Type 2 error is out this.ChanceFalseNeg
        }
Ejemplo n.º 4
0
        public static StrandBiasResults DeepCopy(StrandBiasResults originalSBresults)
        {
            if (originalSBresults == null)
            {
                return(null);
            }

            var sb = new StrandBiasResults()
            {
                BiasAcceptable          = originalSBresults.BiasAcceptable,
                BiasScore               = originalSBresults.BiasScore,
                GATKBiasScore           = originalSBresults.GATKBiasScore,
                VarPresentOnBothStrands = originalSBresults.VarPresentOnBothStrands,
                CovPresentOnBothStrands = originalSBresults.CovPresentOnBothStrands,
                TestAcceptable          = originalSBresults.TestAcceptable,
                TestScore               = originalSBresults.TestScore,
                ForwardStats            = StrandBiasStats.DeepCopy(originalSBresults.ForwardStats),
                OverallStats            = StrandBiasStats.DeepCopy(originalSBresults.OverallStats),
                ReverseStats            = StrandBiasStats.DeepCopy(originalSBresults.ReverseStats),
                StitchedStats           = StrandBiasStats.DeepCopy(originalSBresults.StitchedStats),
            };

            return(sb);
        }
Ejemplo n.º 5
0
        public static void PopulateDiploidStats(StrandBiasStats stats, double noiseFreq, double minDetectableSNP)
        {
            //expectation we ought to see the 20% variant on this strand:

            //save ourself some time here..
            if (stats.Frequency >= minDetectableSNP)
            {
                stats.ChanceFalseNeg = 1; // TP if we called it
                stats.ChanceFalsePos = 0; //FP if we called if
                stats.ChanceVarFreqGreaterThanZero = 1;
                return;
            }

            //trickier case, when we barely see it but we dont have enough reads...
            var binomialHetAltExpected = new MathNet.Numerics.Distributions.Binomial(minDetectableSNP, (int)stats.Coverage);

            //this is a real variant ( a false neg if we filtered it)
            stats.ChanceFalseNeg = Math.Max(binomialHetAltExpected.CumulativeDistribution(stats.Support), 0); //if this was a het variant, would we ever see it this low?

            //chance this is due to noise ( a false pos if we left it in)
            stats.ChanceFalsePos = Math.Max(0.0, 1 - Poisson.Cdf(stats.Support, stats.Coverage * 0.1)); //chance this varaint is due to noise, we could see this much or more

            stats.ChanceVarFreqGreaterThanZero = stats.ChanceFalseNeg;
        }
        public void TestPopulateDiploidStats()
        {
            double noiseFreq        = 0.01;
            double diploidThreshold = 0.20;

            //Cases where the variant obviously exisits

            StrandBiasStats stats = new StrandBiasStats(100, 100); //#observations, coverage

            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);


            stats = new StrandBiasStats(50, 100); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);


            stats = new StrandBiasStats(20, 100); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);

            //

            //Cases where the variant becomes less obvious

            stats = new StrandBiasStats(15, 100);         //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0.129, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real
            Assert.Equal(stats.ChanceFalsePos, 0.049, 3); //chance this is due to noise ( a false pos if we left it in). not very likely
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.129, 3);


            stats = new StrandBiasStats(10, 100);         //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0.006, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real
            Assert.Equal(stats.ChanceFalsePos, 0.417, 3); //chance this is due to noise ( a false pos if we left it in). not very likely
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.006, 3);


            stats = new StrandBiasStats(1, 100);      //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real
            Assert.Equal(stats.ChanceFalsePos, 1, 3); //chance this is due to noise ( a false pos if we left it in). not very likely
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3);

            //a few pathological cases

            stats = new StrandBiasStats(0, 100); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0, 3);
            Assert.Equal(stats.ChanceFalsePos, 1, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3);


            stats = new StrandBiasStats(10, 0); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);


            stats = new StrandBiasStats(0, 0);        //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3); //not a meaningful answer, but at least nothing explodes.
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);


            stats = new StrandBiasStats(101, 100); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 1, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3);

            //check it reacts properly to depth. Ie, a 15% variant in N of 20 isnt a big deal,
            //but a 15% varaint in N of 100000 seems rather low.

            stats = new StrandBiasStats((20.0 * 0.15), 20); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0.411, 3);   //note, the believability of this variant goes up from 0.129
            Assert.Equal(stats.ChanceFalsePos, 0.143, 3);   //but its also more possible to be noise. Basically, the whole picture is more murky
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.411, 3);

            stats = new StrandBiasStats(15, 100); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0.129, 3);
            Assert.Equal(stats.ChanceFalsePos, 0.049, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.129, 3);

            //slightly more lilkey to be a variant than noise, but neither hypothesis fits.
            stats = new StrandBiasStats((500.0 * 0.15), 500); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0.002, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.002, 3);

            //it doesnt look like noise or a varaint. no hypothesis is reasonable.
            stats = new StrandBiasStats((100000.0 * 0.15), 100000); //#observations, coverage
            StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold);
            Assert.Equal(stats.ChanceFalseNeg, 0, 3);
            Assert.Equal(stats.ChanceFalsePos, 0, 3);
            Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3);
        }