private static BiasResults GetProbePoolBiasScore(VariantComparisonCase Case, CalledAllele Consensus, float ProbePoolBiasThreshold, VariantCallingParameters variantCallingParameters, int AltCountA, int AltCountB, int DepthA, int DepthB, Genotype Genotype, bool AltChangeToRef) { double ProbePoolPScore = 0; //no bias; double ProbePoolGATKBiasScore = -100; //no bias; int NoiseLevel = Consensus.NoiseLevelApplied; BiasResults PB = new BiasResults(); if ((AltChangeToRef) || (Case == VariantComparisonCase.AgreedOnReference)) { PB.GATKBiasScore = ProbePoolGATKBiasScore; PB.BiasScore = ProbePoolPScore; return(PB); } if ((Case == VariantComparisonCase.OneReferenceOneAlternate) || (Case == VariantComparisonCase.CanNotCombine)) { Consensus.Filters.Add(FilterType.PoolBias); PB.GATKBiasScore = 0; PB.BiasScore = 1; return(PB); } if (Case == VariantComparisonCase.AgreedOnAlternate) { int[] supportByPool = new int[] { AltCountA, AltCountB, 0 }; int[] covByPool = new int[] { DepthA, DepthB, 0 }; BiasResults ProbePoolBiasResults = StrandBiasCalculator.CalculateStrandBiasResults( covByPool, supportByPool, NoiseLevel, variantCallingParameters.MinimumFrequency, ProbePoolBiasThreshold, StrandBiasModel.Extended); ProbePoolGATKBiasScore = Math.Min(0, ProbePoolBiasResults.GATKBiasScore); //just cap it at upperbound 0, dont go higher. ProbePoolGATKBiasScore = Math.Max(-100, ProbePoolGATKBiasScore); //just cap it at lowerbound -100, dont go higher. ProbePoolPScore = Math.Min(1, ProbePoolBiasResults.BiasScore); if (!ProbePoolBiasResults.BiasAcceptable) { Consensus.Filters.Add(FilterType.PoolBias); } } PB.GATKBiasScore = ProbePoolGATKBiasScore; PB.BiasScore = ProbePoolPScore; return(PB); }
public void TestSBCalculationsForForcedVariants() { var CoverageByStrandDirection = new int[] { 70038, 65998, 0 }; //forward,reverse,stitched var SupportByStrandDirection = new int[] { 54, 11, 0 }; BiasResults SB = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection, 20, 0.01, 0.5, StrandBiasModel.Poisson); Assert.Equal(SB.BiasScore, 1.0); Assert.Equal(SB.GATKBiasScore, 0); }
public CalledAllele() { Filters = new List <FilterType>(); EstimatedCoverageByDirection = new int[Constants.NumDirectionTypes]; StrandBiasResults = new BiasResults(); SupportByDirection = new int[Constants.NumDirectionTypes]; WellAnchoredSupportByDirection = new int[Constants.NumDirectionTypes]; ReadCollapsedCountsMut = new int[Constants.NumReadCollapsedTypes]; ReadCollapsedCountTotal = new int[Constants.NumReadCollapsedTypes]; Genotype = Genotype.HomozygousRef; Type = AlleleCategory.Reference; }
/// <summary> /// Assign a strandbias-score to a SNP. /// (using only forward and reverse SNP counts.) /// </summary> public static BiasResults CalculateStrandBiasResults(int[] coverageByStrandDirection, int[] supportByStrandDirection, int qNoise, double minVariantFreq, double acceptanceCriteria, StrandBiasModel strandBiasModel) { var forwardSupport = supportByStrandDirection[(int)DirectionType.Forward]; var forwardCoverage = coverageByStrandDirection[(int)DirectionType.Forward]; var reverseSupport = supportByStrandDirection[(int)DirectionType.Reverse]; var reverseCoverage = coverageByStrandDirection[(int)DirectionType.Reverse]; var stitchedSupport = supportByStrandDirection[(int)DirectionType.Stitched]; var stitchedCoverage = coverageByStrandDirection[(int)DirectionType.Stitched]; var errorRate = Math.Pow(10, -1 * qNoise / 10f); var overallStats = CreateStats(forwardSupport + reverseSupport + stitchedSupport, forwardCoverage + reverseCoverage + stitchedCoverage, errorRate, minVariantFreq, strandBiasModel); var forwardStats = CreateStats(forwardSupport + stitchedSupport / 2, forwardCoverage + stitchedCoverage / 2, errorRate, minVariantFreq, strandBiasModel); var reverseStats = CreateStats(reverseSupport + stitchedSupport / 2, reverseCoverage + stitchedCoverage / 2, errorRate, minVariantFreq, strandBiasModel); var results = new BiasResults { ForwardStats = forwardStats, ReverseStats = reverseStats, OverallStats = overallStats }; results.StitchedStats = CreateStats(stitchedSupport, stitchedCoverage, errorRate, minVariantFreq, strandBiasModel); var biasResults = AssignBiasScore(overallStats, forwardStats, reverseStats); results.BiasScore = biasResults[0]; results.GATKBiasScore = biasResults[1]; results.CovPresentOnBothStrands = ((forwardStats.Coverage > 0) && (reverseStats.Coverage > 0)); results.VarPresentOnBothStrands = ((forwardStats.Support > 0) && (reverseStats.Support > 0)); //not really fair to call it biased if coverage is in one direction.. //its ambiguous if variant is found in only one direction. if (!results.CovPresentOnBothStrands) { results.BiasScore = 0; results.GATKBiasScore = double.NegativeInfinity; } results.BiasAcceptable = (results.BiasScore < acceptanceCriteria); return(results); }
public CalledAllele(CalledAllele originalAllele) { Chromosome = originalAllele.Chromosome; ReferencePosition = originalAllele.ReferencePosition; ReferenceAllele = originalAllele.ReferenceAllele; AlternateAllele = originalAllele.AlternateAllele; Genotype = originalAllele.Genotype; GenotypeQscore = originalAllele.GenotypeQscore; VariantQscore = originalAllele.VariantQscore; NumNoCalls = originalAllele.NumNoCalls; NoiseLevelApplied = originalAllele.NoiseLevelApplied; TotalCoverage = originalAllele.TotalCoverage; AlleleSupport = originalAllele.AlleleSupport; WellAnchoredSupport = originalAllele.WellAnchoredSupport; ReferenceSupport = originalAllele.ReferenceSupport; Type = originalAllele.Type; SupportByDirection = new int[Constants.NumDirectionTypes]; WellAnchoredSupportByDirection = new int[Constants.NumDirectionTypes]; EstimatedCoverageByDirection = new int[Constants.NumDirectionTypes]; ReadCollapsedCountsMut = new int[Constants.NumReadCollapsedTypes]; ReadCollapsedCountTotal = new int[Constants.NumReadCollapsedTypes]; StrandBiasResults = BiasResults.DeepCopy(originalAllele.StrandBiasResults); UnanchoredCoverageWeight = originalAllele.UnanchoredCoverageWeight; ConfidentCoverageStart = originalAllele.ConfidentCoverageStart; ConfidentCoverageEnd = originalAllele.ConfidentCoverageEnd; SuspiciousCoverageStart = originalAllele.SuspiciousCoverageStart; SuspiciousCoverageEnd = originalAllele.SuspiciousCoverageEnd; Filters = new List <FilterType>(); foreach (var filter in originalAllele.Filters) { Filters.Add(filter); } for (int i = 0; i < Constants.NumDirectionTypes; i++) { SupportByDirection[i] = originalAllele.SupportByDirection[i]; WellAnchoredSupportByDirection[i] = originalAllele.WellAnchoredSupportByDirection[i]; EstimatedCoverageByDirection[i] = originalAllele.EstimatedCoverageByDirection[i]; } for (int i = 0; i < Constants.NumReadCollapsedTypes; i++) { ReadCollapsedCountsMut[i] = originalAllele.ReadCollapsedCountsMut[i]; ReadCollapsedCountTotal[i] = originalAllele.ReadCollapsedCountTotal[i]; } }
private static BiasResults GetCombinedSBValue(CalledAllele VariantA, CalledAllele VariantB, SampleAggregationParameters SampleAggregationOptions) { BiasResults StrandBiasResults = new BiasResults(); if (VariantA == null) { return(VariantB.StrandBiasResults); } if (VariantB == null) { return(VariantA.StrandBiasResults); } StrandBiasResults.GATKBiasScore = Math.Max(VariantA.StrandBiasResults.GATKBiasScore, VariantB.StrandBiasResults.GATKBiasScore); return(StrandBiasResults); }
public static string StatsToString(BiasResults stat) { var delimiter = "\t"; StringBuilder builder = new StringBuilder(); string[] statsData = new string[3 * 5]; StringHelper(stat.OverallStats, statsData, 0, 3, delimiter); StringHelper(stat.ForwardStats, statsData, 1, 3, delimiter); StringHelper(stat.ReverseStats, statsData, 2, 3, delimiter); foreach (string t in statsData) { builder.Append(t); } //raw data //for (int i = 0; i < Constants.NumDirectionTypes; i++) // builder.Append(stat.CoverageByStrandDirection[i] + "\t"); builder.Append(stat.ForwardStats.Coverage + delimiter); builder.Append(stat.ReverseStats.Coverage + delimiter); builder.Append(stat.StitchedStats.Coverage + delimiter); //for (int i = 0; i < Constants.NumDirectionTypes; i++) // builder.Append(SupportByStrandDirection[i] + "\t"); builder.Append(stat.ForwardStats.Support + delimiter); builder.Append(stat.ReverseStats.Support + delimiter); builder.Append(stat.StitchedStats.Support + delimiter); //results builder.Append(stat.BiasScore + delimiter); builder.Append(stat.BiasAcceptable + delimiter); builder.Append(stat.VarPresentOnBothStrands + delimiter); builder.Append(stat.CovPresentOnBothStrands + delimiter); return(builder.ToString()); }
public static BiasResults DeepCopy(BiasResults originalSBresults) { if (originalSBresults == null) { return(null); } var sb = new BiasResults() { BiasAcceptable = originalSBresults.BiasAcceptable, BiasScore = originalSBresults.BiasScore, GATKBiasScore = originalSBresults.GATKBiasScore, VarPresentOnBothStrands = originalSBresults.VarPresentOnBothStrands, CovPresentOnBothStrands = originalSBresults.CovPresentOnBothStrands, TestAcceptable = originalSBresults.TestAcceptable, TestScore = originalSBresults.TestScore, ForwardStats = StrandBiasStats.DeepCopy(originalSBresults.ForwardStats), OverallStats = StrandBiasStats.DeepCopy(originalSBresults.OverallStats), ReverseStats = StrandBiasStats.DeepCopy(originalSBresults.ReverseStats), StitchedStats = StrandBiasStats.DeepCopy(originalSBresults.StitchedStats), }; return(sb); }
public void TestSBCalculationsForSomaticAndDiploidSettings() { double fwdCov = 10000; double revCov = 10000; double testVariantFreqA = 0.05; double testVariantFreqB = 0.25; double testVariantFreqC = 0.020; double testVariantFreqD = 0.005; var CoverageByStrandDirection = new int[] { (int)fwdCov, (int)revCov, 0 }; //forward,reverse,stitched var EqualSupportByStrandDirectionA = new int[] { (int)(fwdCov * testVariantFreqA), (int)(revCov * testVariantFreqA), 0 }; var EqualSupportByStrandDirectionB = new int[] { (int)(fwdCov * testVariantFreqB), (int)(revCov * testVariantFreqB), 0 }; //happy path, no bias BiasResults SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, EqualSupportByStrandDirectionB, 20, 0.01, 0.5, StrandBiasModel.Extended); BiasResults SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, EqualSupportByStrandDirectionB, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 0); Assert.Equal(SB_somatic.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_somatic.BiasAcceptable, true); Assert.Equal(SB_diploid.BiasScore, 0); Assert.Equal(SB_diploid.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_diploid.BiasAcceptable, true); //bias if you are looking for a 20% variant (only one side is sufficient to call), //but not biased in the somatic case (both show up sufficiently) var SupportByStrandDirection_bias20 = new int[] { (int)(fwdCov * testVariantFreqA), (int)(revCov * testVariantFreqB), 0 }; SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias20, 20, 0.01, 0.5, StrandBiasModel.Extended); SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias20, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 0); Assert.Equal(SB_somatic.GATKBiasScore, double.NegativeInfinity); Assert.Equal(SB_somatic.BiasAcceptable, true); Assert.Equal(Math.Log10(SB_diploid.BiasScore), 74.3, 1); // a great big bias Assert.Equal(SB_diploid.GATKBiasScore, 743.5, 1); Assert.Equal(SB_diploid.BiasAcceptable, false); //bias if you are looking for even a 1% variant or a 20% variant var SupportByStrandDirection_bias01 = new int[] { (int)(fwdCov * testVariantFreqC), (int)(revCov * testVariantFreqD), 0 }; SB_somatic = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias01, 20, 0.01, 0.5, StrandBiasModel.Extended); SB_diploid = StrandBiasCalculator.CalculateStrandBiasResults( CoverageByStrandDirection, SupportByStrandDirection_bias01, 20, 0.20, 0.5, StrandBiasModel.Diploid); Assert.Equal(SB_somatic.BiasScore, 1.000, 3); Assert.Equal(SB_somatic.GATKBiasScore, 0.002, 3); Assert.Equal(SB_somatic.BiasAcceptable, false); Assert.Equal(SB_diploid.BiasScore, 1.000, 3);// a great big bias Assert.Equal(SB_diploid.GATKBiasScore, 0.000, 3); Assert.Equal(SB_diploid.BiasAcceptable, false); }
/// <summary> /// populates a called allele object given an array of vcf columns /// </summary> protected static void ConvertColumnsToVariant(bool shouldTrimComplexAlleles, string[] cols, CalledAllele allele, int alleleIndex) { bool shouldOutputRcCounts = true; bool shouldOutputTsCounts = true; if ((cols == null) || (cols.Length == 0)) { allele = null; return; } //set defaults. var genotypeQscore = 0; var referenceSupport = 0; var altSupport = 0; var genotypeString = ""; var totalCoverage = 0; var variantQuality = 0.0; var numAlts = 1; var noiseLevel = 0; var fractionNocalls = 0f; var strandBiasInGATKScaleCoords = -100f; var tsCounts = new List <string>(); // //read in simple data allele.Chromosome = cols[VcfCommon.ChromIndex]; allele.ReferencePosition = int.Parse(cols[VcfCommon.PosIndex]); allele.ReferenceAllele = cols[VcfCommon.RefIndex]; allele.Filters = VcfVariantUtilities.MapFilterString(cols[VcfCommon.FilterIndex]); bool gotQual = double.TryParse(cols[VcfCommon.QualIndex], out variantQuality); // CFTR uses a ".", which is not actually legal... (actually, vcf 4.1 does allow the missing value "." here. Strelka uses it) if (gotQual) { allele.VariantQscore = (int)variantQuality; } // parse the variant alleles var variantAlleles = cols[VcfCommon.AltIndex].Split(','); allele.AlternateAllele = variantAlleles[alleleIndex]; var isRef = (allele.AlternateAllele == "."); if (isRef) { numAlts = 0; } else { numAlts = variantAlleles.Count(); } // parse the info field data (presume, single sample) Dictionary <string, string> InfoFields = ParseInfoFields(cols); // parse the genotype data (presume, single sample) List <Dictionary <string, string> > Genotypes = ParseGenotypeData(cols); //get more complex allele data... if (InfoFields.ContainsKey("DP")) { totalCoverage = Int32.Parse(InfoFields["DP"]); } if ((Genotypes.Count > 0) && (Genotypes[0] != null)) { if (Genotypes[0].ContainsKey("GQ")) { genotypeQscore = Int32.Parse(Genotypes[0]["GQ"]); } else if (Genotypes[0].ContainsKey("GQX")) { genotypeQscore = Int32.Parse(Genotypes[0]["GQX"]); } if (Genotypes[0].ContainsKey("GT")) { genotypeString = Genotypes[0]["GT"]; } if (Genotypes[0].ContainsKey("NL")) { noiseLevel = Int32.Parse(Genotypes[0]["NL"]); } if (Genotypes[0].ContainsKey("NC")) { fractionNocalls = float.Parse(Genotypes[0]["NC"]); } if (Genotypes[0].ContainsKey("SB")) { strandBiasInGATKScaleCoords = float.Parse(Genotypes[0]["SB"]); } var ADstrings = new string[] { "0", "0" }; if (Genotypes[0].ContainsKey("AD")) { ADstrings = Genotypes[0]["AD"].Split(','); } referenceSupport = int.Parse(ADstrings[0]); //by default alt support is 0. if ((!isRef) && (ADstrings.Length > 1)) { altSupport = int.Parse(ADstrings[1]); } if (shouldOutputRcCounts) { if (Genotypes[0].ContainsKey("US")) { tsCounts = Genotypes[0]["US"].Split(',').ToList(); } } allele.Genotype = VcfVariantUtilities.MapGTString(genotypeString, numAlts); //note this awkward vcf line (pisces) //"chr4\t10\t.\tAA\tGA,G\t0\tPASS\tDP=5394\tGT:GQ:AD:DP:VF:NL:SB:NC\t1/2:0:2387,2000:5394:0.8133:23:0.0000:0.0000"; //and this one //chr2 19946216.ATGTGTG ATG,ATGTG,A 0 PASS metal = platinum; cgi =.; bwa_freebayes = HD:0,LOOHD: 0; bwa_platypus =.; bwa_gatk3 = HD:2,LOOHD: 2; cortex =.; isaac2 = HD:1,LOOHD: 1; dist2closest = 192 GT 1 | 2 if ((numAlts >= 2) && (Genotypes[0].ContainsKey("AD"))) { if (ADstrings.Count() <= numAlts) //in this case we never expressedly gave the ref support, so we have to derive it. { int totalAltCount = 0; for (int altIndex = 0; altIndex < numAlts; altIndex++) { var altSupportAtIndex = int.Parse(ADstrings[altIndex]); totalAltCount += altSupportAtIndex; if (altIndex == alleleIndex) { altSupport = altSupportAtIndex; } } referenceSupport = Math.Max(0, totalCoverage - totalAltCount); } } } var strandBiasResults = new BiasResults(); strandBiasResults.GATKBiasScore = strandBiasInGATKScaleCoords; //set the remaining data allele.TotalCoverage = totalCoverage; allele.AlleleSupport = isRef ? referenceSupport : altSupport; allele.ReferenceSupport = referenceSupport; allele.GenotypeQscore = genotypeQscore; allele.NoiseLevelApplied = noiseLevel; allele.StrandBiasResults = strandBiasResults; allele.IsForcedToReport = allele.Filters.Contains(FilterType.ForcedReport); //set the derived values allele.SetType(); allele.ForceFractionNoCalls(fractionNocalls); //rescue attempt for complex types, ie ACGT -> ACGTGG. //Get the simplest form of the allele if ((allele.Type == AlleleCategory.Unsupported) && shouldTrimComplexAlleles) { VcfVariantUtilities.TrimUnsupportedAlleleType(allele); } if (tsCounts.Count != 0) { VcfVariantUtilities.FillInCollapsedReadsCount(shouldOutputRcCounts, shouldOutputTsCounts, allele, tsCounts); } }
public static CalledAllele ConvertUnpackedVariant(VcfVariant v, bool shouldOutputRcCounts = false, bool shouldOutputTsCounts = false, bool shouldTrimComplexAlleles = true) { if (v == null) { return(null); } if (v.VariantAlleles.Count() > 1) { throw new ArgumentException("This method does not handle crushed vcf format. Use Convert(IEnumerable<VcfVariant> vcfVariants)"); } var genotypeQscore = 0; var referenceSupport = 0; var altSupport = 0; var genotypeString = ""; var totalCoverage = 0; var isRef = ((v.VariantAlleles.Count() == 1) && v.VariantAlleles[0] == "."); var variantQuality = v.Quality; var numAlts = 1; var noiseLevel = 1; var fractionNocalls = 0f; var strandBiasInGATKScaleCoords = -100f; var tsCounts = new List <string>(); if (v.InfoFields.ContainsKey("DP")) { totalCoverage = Int32.Parse(v.InfoFields["DP"]); } if (v.Genotypes.Count > 0) { if (v.Genotypes[0].ContainsKey("GQ")) { genotypeQscore = Int32.Parse(v.Genotypes[0]["GQ"]); } else if (v.Genotypes[0].ContainsKey("GQX")) { genotypeQscore = Int32.Parse(v.Genotypes[0]["GQX"]); } genotypeString = v.Genotypes[0]["GT"]; if (v.Genotypes[0].ContainsKey("NL")) { noiseLevel = Int32.Parse(v.Genotypes[0]["NL"]); } if (v.Genotypes[0].ContainsKey("NC")) { fractionNocalls = float.Parse(v.Genotypes[0]["NC"]); } if (v.Genotypes[0].ContainsKey("SB")) { strandBiasInGATKScaleCoords = float.Parse(v.Genotypes[0]["SB"]); } var ADstring = new string[] { "0", "0" }; if (v.Genotypes[0].ContainsKey("AD")) { ADstring = v.Genotypes[0]["AD"].Split(','); } var VFstring = new string[] { "0", "0" }; if (v.Genotypes[0].ContainsKey("VF")) { VFstring = v.Genotypes[0]["VF"].Split(','); } referenceSupport = int.Parse(ADstring[0]); altSupport = isRef ? 0 : int.Parse(ADstring[1]); if (shouldOutputRcCounts) { if (v.Genotypes[0].ContainsKey("US")) { tsCounts = v.Genotypes[0]["US"].Split(',').ToList(); } } if (isRef) { numAlts = 0; } else { numAlts = 1; //note this, method should never get a value here >1. these should be UNPACKED variants } } var strandBiasResults = new BiasResults(); strandBiasResults.GATKBiasScore = strandBiasInGATKScaleCoords; var filters = MapFilterString(v.Filters); var allele = new CalledAllele() { Chromosome = v.ReferenceName, ReferencePosition = v.ReferencePosition, ReferenceAllele = v.ReferenceAllele, AlternateAllele = v.VariantAlleles[0], TotalCoverage = totalCoverage, AlleleSupport = isRef ? referenceSupport : altSupport, ReferenceSupport = referenceSupport, VariantQscore = (int)variantQuality, GenotypeQscore = genotypeQscore, Genotype = MapGTString(genotypeString, numAlts), Filters = filters, NoiseLevelApplied = noiseLevel, StrandBiasResults = strandBiasResults, IsForcedToReport = filters.Contains(FilterType.ForcedReport) }; allele.SetType(); allele.ForceFractionNoCalls(fractionNocalls); //rescue attempt for complex types, ie ACGT -> ACGTGG if ((allele.Type == AlleleCategory.Unsupported) && shouldTrimComplexAlleles) { TrimUnsupportedAlleleType(allele); } FillInCollapsedReadsCount(shouldOutputRcCounts, shouldOutputTsCounts, allele, tsCounts); return(allele); }