private StrandBiasResults ExecuteTest(Tuple <double, int> forwardStats, Tuple <double, int> reverseStats, Tuple <double, int> stitchedStats, int estimatedBaseCallQuality = 20, float threshold = 0.5f, StrandBiasModel model = StrandBiasModel.Poisson) { var origForwardSupport = (int)(forwardStats.Item1 * forwardStats.Item2); var origReverseSupport = (int)(reverseStats.Item1 * reverseStats.Item2); var origStitchedSupport = (int)(stitchedStats.Item1 * stitchedStats.Item2); var support = new int[] { origForwardSupport, origReverseSupport, origStitchedSupport, }; var variant = new CalledVariant(AlleleCategory.Snv) { TotalCoverageByDirection = new int[] { forwardStats.Item2, reverseStats.Item2, stitchedStats.Item2 } }; StrandBiasCalculator.Compute(variant, support, estimatedBaseCallQuality, threshold, model); Assert.Equal(origForwardSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ForwardStats.Support); Assert.Equal(origReverseSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ReverseStats.Support); return(variant.StrandBiasResults); }
private void ProcessVariant(IAlleleSource source, CalledAllele variant) { // determine metrics _coverageCalculator.Compute(variant, source); if (variant.AlleleSupport > 0) { if (_config.NoiseModel == NoiseModel.Window) { VariantQualityCalculator.Compute(variant, _config.MaxVariantQscore, (int)MathOperations.PtoQ(variant.SumOfBaseQuality / variant.TotalCoverage)); } else { VariantQualityCalculator.Compute(variant, _config.MaxVariantQscore, _config.EstimatedBaseCallQuality); } StrandBiasCalculator.Compute(variant, variant.SupportByDirection, _config.EstimatedBaseCallQuality, _config.StrandBiasFilterThreshold, _config.StrandBiasModel); } // set genotype, filter, etc AlleleProcessor.Process(variant, _config.MinFrequency, _config.LowDepthFilter, _config.VariantQscoreFilterThreshold, _config.FilterSingleStrandVariants, _config.VariantFreqFilter, _config.LowGTqFilter, _config.IndelRepeatFilter, _config.RMxNFilterSettings, _config.ChrReference, source.ExpectStitchedReads); }
private static BiasResults GetProbePoolBiasScore(VariantComparisonCase Case, CalledAllele Consensus, float ProbePoolBiasThreshold, VariantCallingParameters variantCallingParameters, int AltCountA, int AltCountB, int DepthA, int DepthB, Genotype Genotype, bool AltChangeToRef) { double ProbePoolPScore = 0; //no bias; double ProbePoolGATKBiasScore = -100; //no bias; int NoiseLevel = Consensus.NoiseLevelApplied; BiasResults PB = new BiasResults(); if ((AltChangeToRef) || (Case == VariantComparisonCase.AgreedOnReference)) { PB.GATKBiasScore = ProbePoolGATKBiasScore; PB.BiasScore = ProbePoolPScore; return(PB); } if ((Case == VariantComparisonCase.OneReferenceOneAlternate) || (Case == VariantComparisonCase.CanNotCombine)) { Consensus.Filters.Add(FilterType.PoolBias); PB.GATKBiasScore = 0; PB.BiasScore = 1; return(PB); } if (Case == VariantComparisonCase.AgreedOnAlternate) { int[] supportByPool = new int[] { AltCountA, AltCountB, 0 }; int[] covByPool = new int[] { DepthA, DepthB, 0 }; BiasResults ProbePoolBiasResults = StrandBiasCalculator.CalculateStrandBiasResults( covByPool, supportByPool, NoiseLevel, variantCallingParameters.MinimumFrequency, ProbePoolBiasThreshold, StrandBiasModel.Extended); ProbePoolGATKBiasScore = Math.Min(0, ProbePoolBiasResults.GATKBiasScore); //just cap it at upperbound 0, dont go higher. ProbePoolGATKBiasScore = Math.Max(-100, ProbePoolGATKBiasScore); //just cap it at lowerbound -100, dont go higher. ProbePoolPScore = Math.Min(1, ProbePoolBiasResults.BiasScore); if (!ProbePoolBiasResults.BiasAcceptable) { Consensus.Filters.Add(FilterType.PoolBias); } } PB.GATKBiasScore = ProbePoolGATKBiasScore; PB.BiasScore = ProbePoolPScore; return(PB); }
public void TestSBCalculationsForForcedVariants() { var CoverageByStrandDirection = new int[] { 70038, 65998, 0 }; //forward,reverse,stitched var SupportByStrandDirection = new int[] { 54, 11, 0 }; BiasResults SB = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection, 20, 0.01, 0.5, StrandBiasModel.Poisson); Assert.Equal(SB.BiasScore, 1.0); Assert.Equal(SB.GATKBiasScore, 0); }
private void ProcessVariant(IStateManager source, BaseCalledAllele variant) { // determine metrics CoverageCalculator.Compute(variant, source); QualityCalculator.Compute(variant, _config.MaxVariantQscore, _config.EstimatedBaseCallQuality); StrandBiasCalculator.Compute(variant, variant.SupportByDirection, _config.EstimatedBaseCallQuality, _config.StrandBiasFilterThreshold, _config.StrandBiasModel); // set genotype, filter, etc AlleleProcessor.Process(variant, _config.GenotypeModel, _config.MinFrequency, _config.MinCoverage, _config.VariantQscoreFilterThreshold, _config.FilterSingleStrandVariants); }
public void TestSBCalculationsForSomaticAndDiploidSettings() { double fwdCov = 10000; double revCov = 10000; double testVariantFreqA = 0.05; double testVariantFreqB = 0.25; double testVariantFreqC = 0.020; double testVariantFreqD = 0.005; var CoverageByStrandDirection = new int[] { (int)fwdCov, (int)revCov, 0 }; //forward,reverse,stitched var EqualSupportByStrandDirectionA = new int[] { (int)(fwdCov * testVariantFreqA), (int)(revCov * testVariantFreqA), 0 }; var EqualSupportByStrandDirectionB = new int[] { (int)(fwdCov * testVariantFreqB), (int)(revCov * testVariantFreqB), 0 }; //happy path, no bias BiasResults SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, EqualSupportByStrandDirectionB, 20, 0.01, 0.5, StrandBiasModel.Extended); BiasResults SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, EqualSupportByStrandDirectionB, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 0); Assert.Equal(SB_somatic.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_somatic.BiasAcceptable, true); Assert.Equal(SB_diploid.BiasScore, 0); Assert.Equal(SB_diploid.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_diploid.BiasAcceptable, true); //bias if you are looking for a 20% variant (only one side is sufficient to call), //but not biased in the somatic case (both show up sufficiently) var SupportByStrandDirection_bias20 = new int[] { (int)(fwdCov * testVariantFreqA), (int)(revCov * testVariantFreqB), 0 }; SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias20, 20, 0.01, 0.5, StrandBiasModel.Extended); SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias20, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 0); Assert.Equal(SB_somatic.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_somatic.BiasAcceptable, true); Assert.Equal(Math.Log10(SB_diploid.BiasScore), 74.3, 1); // a great big bias Assert.Equal(SB_diploid.GATKBiasScore, 743.5, 1); Assert.Equal(SB_diploid.BiasAcceptable, false); //bias if you are looking for even a 1% variant or a 20% variant var SupportByStrandDirection_bias01 = new int[] { (int)(fwdCov * testVariantFreqC), (int)(revCov * testVariantFreqD), 0 }; SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias01, 20, 0.01, 0.5, StrandBiasModel.Extended); SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias01, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 1.000, 3); Assert.Equal(SB_somatic.GATKBiasScore, 0.002, 3); Assert.Equal(SB_somatic.BiasAcceptable, false); Assert.Equal(SB_diploid.BiasScore, 1.000, 3);// a great big bias Assert.Equal(SB_diploid.GATKBiasScore, 0.000, 3); Assert.Equal(SB_diploid.BiasAcceptable, false); }
public void TestPopulateDiploidStats() { double noiseFreq = 0.01; double diploidThreshold = 0.20; //Cases where the variant obviously exisits StrandBiasStats stats = new StrandBiasStats(100, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); stats = new StrandBiasStats(50, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); stats = new StrandBiasStats(20, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); // //Cases where the variant becomes less obvious stats = new StrandBiasStats(15, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0.129, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real Assert.Equal(stats.ChanceFalsePos, 0.049, 3); //chance this is due to noise ( a false pos if we left it in). not very likely Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.129, 3); stats = new StrandBiasStats(10, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0.006, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real Assert.Equal(stats.ChanceFalsePos, 0.417, 3); //chance this is due to noise ( a false pos if we left it in). not very likely Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.006, 3); stats = new StrandBiasStats(1, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0, 3); //Chance this is a real variant ( a false neg if we filtered it)//it could happen that this is still real Assert.Equal(stats.ChanceFalsePos, 1, 3); //chance this is due to noise ( a false pos if we left it in). not very likely Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3); //a few pathological cases stats = new StrandBiasStats(0, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0, 3); Assert.Equal(stats.ChanceFalsePos, 1, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3); stats = new StrandBiasStats(10, 0); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); stats = new StrandBiasStats(0, 0); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); //not a meaningful answer, but at least nothing explodes. Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); stats = new StrandBiasStats(101, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 1, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 1, 3); //check it reacts properly to depth. Ie, a 15% variant in N of 20 isnt a big deal, //but a 15% varaint in N of 100000 seems rather low. stats = new StrandBiasStats((20.0 * 0.15), 20); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0.411, 3); //note, the believability of this variant goes up from 0.129 Assert.Equal(stats.ChanceFalsePos, 0.143, 3); //but its also more possible to be noise. Basically, the whole picture is more murky Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.411, 3); stats = new StrandBiasStats(15, 100); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0.129, 3); Assert.Equal(stats.ChanceFalsePos, 0.049, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.129, 3); //slightly more lilkey to be a variant than noise, but neither hypothesis fits. stats = new StrandBiasStats((500.0 * 0.15), 500); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0.002, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0.002, 3); //it doesnt look like noise or a varaint. no hypothesis is reasonable. stats = new StrandBiasStats((100000.0 * 0.15), 100000); //#observations, coverage StrandBiasCalculator.PopulateDiploidStats(stats, noiseFreq, diploidThreshold); Assert.Equal(stats.ChanceFalseNeg, 0, 3); Assert.Equal(stats.ChanceFalsePos, 0, 3); Assert.Equal(stats.ChanceVarFreqGreaterThanZero, 0, 3); }
public void Write() { var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "StrandBiasWriterTests.txt"); File.Delete(outputFile); var chromosome = "chr1"; var reference = "TTT"; var alternate = "T"; var position = 123; var BaseCalledAlleles = new List <CalledAllele>(); var variant = new CalledAllele(AlleleCategory.Deletion) { Chromosome = chromosome, Reference = reference, Alternate = alternate, Coordinate = position, StrandBiasResults = new StrandBiasResults() { BiasAcceptable = true, BiasScore = 1, CovPresentOnBothStrands = true, ForwardStats = StrandBiasCalculator.CreateStats(10, 100, .1, .1, StrandBiasModel.Poisson), GATKBiasScore = .2, OverallStats = StrandBiasCalculator.CreateStats(20, 200, .2, .2, StrandBiasModel.Poisson), ReverseStats = StrandBiasCalculator.CreateStats(30, 300, .3, .3, StrandBiasModel.Poisson), StitchedStats = StrandBiasCalculator.CreateStats(40, 400, .4, .4, StrandBiasModel.Poisson), TestAcceptable = true, TestScore = .5, VarPresentOnBothStrands = true, } }; BaseCalledAlleles.Add(variant); var writer = new StrandBiasFileWriter(outputFile); writer.WriteHeader(); writer.Write(BaseCalledAlleles); writer.Dispose(); var biasFileContents = File.ReadAllLines(outputFile); Assert.True(biasFileContents.Length == 2); var header = biasFileContents.First().Split('\t'); var data = biasFileContents.Skip(1).First().Split('\t'); var dict = header.Select((a, i) => new { key = a, data = data[i] }) .ToDictionary(b => b.key, c => c.data); // Make sure well-formed and populated with the right data Assert.Equal(chromosome, dict["Chr"]); Assert.Equal(position.ToString(), dict["Position"]); Assert.Equal(reference, dict["Reference"]); Assert.Equal(alternate, dict["Alternate"]); Assert.Equal(variant.StrandBiasResults.OverallStats.ChanceFalsePos.ToString(), dict["Overall_ChanceFalsePos"]); Assert.Equal(variant.StrandBiasResults.ForwardStats.ChanceFalsePos.ToString(), dict["Forward_ChanceFalsePos"]); Assert.Equal(variant.StrandBiasResults.ReverseStats.ChanceFalsePos.ToString(), dict["Reverse_ChanceFalsePos"]); Assert.Equal(variant.StrandBiasResults.OverallStats.ChanceFalseNeg.ToString(), dict["Overall_ChanceFalseNeg"]); Assert.Equal(variant.StrandBiasResults.ForwardStats.ChanceFalseNeg.ToString(), dict["Forward_ChanceFalseNeg"]); Assert.Equal(variant.StrandBiasResults.ReverseStats.ChanceFalseNeg.ToString(), dict["Reverse_ChanceFalseNeg"]); Assert.Equal(variant.StrandBiasResults.OverallStats.Frequency.ToString(), dict["Overall_Freq"]); Assert.Equal(variant.StrandBiasResults.ForwardStats.Frequency.ToString(), dict["Forward_Freq"]); Assert.Equal(variant.StrandBiasResults.ReverseStats.Frequency.ToString(), dict["Reverse_Freq"]); Assert.Equal(variant.StrandBiasResults.OverallStats.Support.ToString(), dict["Overall_Support"]); Assert.Equal(variant.StrandBiasResults.ForwardStats.Support.ToString(), dict["Forward_Support"]); Assert.Equal(variant.StrandBiasResults.ReverseStats.Support.ToString(), dict["Reverse_Support"]); Assert.Equal(variant.StrandBiasResults.OverallStats.Coverage.ToString(), dict["Overall_Coverage"]); Assert.Equal(variant.StrandBiasResults.ForwardStats.Coverage.ToString(), dict["Forward_Coverage"]); Assert.Equal(variant.StrandBiasResults.ReverseStats.Coverage.ToString(), dict["Reverse_Coverage"]); Assert.Equal(variant.StrandBiasResults.BiasAcceptable.ToString(), dict["BiasAcceptable?"]); Assert.Equal(variant.StrandBiasResults.VarPresentOnBothStrands.ToString(), dict["VarPresentOnBothStrands?"]); Assert.Equal(variant.StrandBiasResults.CovPresentOnBothStrands.ToString(), dict["CoverageAvailableOnBothStrands?"]); //TODO RawCoverage/RawSupport tests Assert.Throws <Exception>(() => writer.WriteHeader()); Assert.Throws <Exception>(() => writer.Write(BaseCalledAlleles)); writer.Dispose(); }