/// <summary> /// Combine two variants. /// Variant B can be null. /// We never try to combine two different variant alleles. /// </summary> /// <param name="VariantsA"></param> /// <param name="VariantsB"></param> /// <param name="ComparisonCase"></param> /// <param name="Consensus"></param> public AggregateAllele CombineVariants(CalledAllele VariantA, CalledAllele VariantB, VariantComparisonCase ComparisonCase) { SampleAggregationParameters SampleAggregationOptions = _options.SampleAggregationParameters; var Consensus = new AggregateAllele(new List <CalledAllele> { VariantA, VariantB }); int DepthA = 0; int DepthB = 0; //(A) set the reference data. //this should be the same for both. if (VariantA != null) { DoDefensiveGenotyping(VariantA); Consensus.Chromosome = VariantA.Chromosome; Consensus.ReferencePosition = VariantA.ReferencePosition; Consensus.ReferenceAllele = VariantA.ReferenceAllele; DepthA = VariantA.TotalCoverage; } if (VariantB != null) { DoDefensiveGenotyping(VariantB); Consensus.Chromosome = VariantB.Chromosome; Consensus.ReferencePosition = VariantB.ReferencePosition; Consensus.ReferenceAllele = VariantB.ReferenceAllele; DepthB = VariantB.TotalCoverage; } //normally the reference data is the same for both, no matter what the case. //but if we have one deletion and one not, we might have different ref alleles. //So we need to get this right. Consensus.ReferenceAllele = CombineReferenceAlleles(VariantA, VariantB, ComparisonCase); // (B) set the Alternate data Consensus.AlternateAllele = CombineVariantAlleles(VariantA, VariantB, ComparisonCase); // (C) set filters, etc. Consensus.Filters = CombineFilters(VariantA, VariantB); // (E) set GT data, includes calculating the probe-pool bias and quality scores RecalculateScoring(VariantA, VariantB, ComparisonCase, Consensus, SampleAggregationOptions, _options.VariantCallingParams); return(Consensus); }
public VcfWriterConfig(VariantCallingParameters callerOptions, VcfWritingParameters outputOptions, BamFilterParameters bamFilterOptions, SampleAggregationParameters sampleAggregationParameters, bool debugMode, bool outputBiasFiles, bool hasForcedGT = false) { DepthFilterThreshold = outputOptions.OutputGvcfFile ? callerOptions.MinimumCoverage : (callerOptions.LowDepthFilter > callerOptions.MinimumCoverage) ? callerOptions.LowDepthFilter : (int?)null; IndelRepeatFilterThreshold = callerOptions.IndelRepeatFilter > 0 ? callerOptions.IndelRepeatFilter : (int?)null; VariantQualityFilterThreshold = callerOptions.MinimumVariantQScoreFilter; GenotypeQualityFilterThreshold = callerOptions.LowGenotypeQualityFilter.HasValue && callerOptions.MinimumVariantQScoreFilter > callerOptions.MinimumVariantQScore ? callerOptions.LowGenotypeQualityFilter : null; StrandBiasFilterThreshold = callerOptions.StrandBiasAcceptanceCriteria < 1 ? callerOptions.StrandBiasAcceptanceCriteria : (float?)null; AmpliconBiasFilterThreshold = callerOptions.AmpliconBiasFilterThreshold > 0 ? callerOptions.AmpliconBiasFilterThreshold : (float?)null; FrequencyFilterThreshold = GetMinFreqFilterForVcfHeader(callerOptions); MinFrequencyThreshold = callerOptions.MinimumFrequency; ShouldOutputNoCallFraction = outputOptions.ReportNoCalls; ShouldOutputStrandBiasAndNoiseLevel = ShouldOutputNoiseLevelAndStrandBias(debugMode, outputBiasFiles, callerOptions.StrandBiasAcceptanceCriteria); ShouldFilterOnlyOneStrandCoverage = callerOptions.FilterOutVariantsPresentOnlyOneStrand; EstimatedBaseCallQuality = callerOptions.NoiseLevelUsedForQScoring; ShouldOutputRcCounts = outputOptions.ReportRcCounts; ShouldOutputTsCounts = outputOptions.ReportTsCounts; AllowMultipleVcfLinesPerLoci = outputOptions.AllowMultipleVcfLinesPerLoci; PloidyModel = callerOptions.PloidyModel; RMxNFilterMaxLengthRepeat = callerOptions.RMxNFilterMaxLengthRepeat; RMxNFilterMinRepetitions = callerOptions.RMxNFilterMinRepetitions; RMxNFilterFrequencyLimit = callerOptions.RMxNFilterFrequencyLimit; NoiseModel = callerOptions.NoiseModel; ShouldReportGp = outputOptions.ReportGp; NoCallFilterThreshold = callerOptions.NoCallFilterThreshold; ShouldOutputSuspiciousCoverageFraction = outputOptions.ReportSuspiciousCoverageFraction; if (sampleAggregationParameters != null) { ShouldOutputProbeBias = true; ProbePoolBiasFilterThreshold = sampleAggregationParameters.ProbePoolBiasThreshold; } HasForcedGt = hasForcedGT; }
public void VennVcf_CombineTwoPoolVariants_Qscore_DiffentNL_Test() { //chr3 41266161 . A G 30 PASS DP=3067 GT:GQ:AD:VF:NL:SB 0/1:30:3005,54:0.0176:35:-100.0000 CalledAllele VarA = new CalledAllele() { Chromosome = "chr3", ReferencePosition = 41266161, TotalCoverage = 3067, Genotype = Pisces.Domain.Types.Genotype.HeterozygousAltRef, VariantQscore = 30, GenotypeQscore = 30, AlleleSupport = 54, ReferenceSupport = 3005, NoiseLevelApplied = 35, StrandBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = -100.0000 }, ReferenceAllele = "A", AlternateAllele = "G", Type = Pisces.Domain.Types.AlleleCategory.Snv }; ///chr3 41266161 . A . 75 PASS DP=3795 GT:GQ:AD:VF:NL:SB 0/0:75:3780:0.0040:2:-100.0000 CalledAllele VarB = new CalledAllele() { Chromosome = "chr3", ReferencePosition = 41266161, TotalCoverage = 3795, Genotype = Pisces.Domain.Types.Genotype.HomozygousRef, VariantQscore = 75, GenotypeQscore = 75, AlleleSupport = 3780, ReferenceSupport = 3780, NoiseLevelApplied = 2, StrandBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = -100.0000 }, ReferenceAllele = "A", AlternateAllele = ".", Type = Pisces.Domain.Types.AlleleCategory.Reference }; //old answer //chr3 41266161 . A . 100.00 PASS DP=6862;cosmic=COSM1423020,COSM1423021;EVS=0|69.0|6503;phastCons GT:GQ:AD:VF:NL:SB:PB:GQX 0/0:100:6785:0.0079:35:-100:-100.0000:100 SampleAggregationParameters SampleAggregationOptions = new SampleAggregationParameters(); SampleAggregationOptions.ProbePoolBiasThreshold = 0.5f; SampleAggregationOptions.HowToCombineQScore = SampleAggregationParameters.CombineQScoreMethod.CombinePoolsAndReCalculate; _basicOptions.BamFilterParams.MinimumBaseCallQuality = 20; _basicOptions.VariantCallingParams.MinimumFrequency = 0.01f; _basicOptions.VariantCallingParams.MinimumFrequencyFilter = 0.03f; _basicOptions.SampleAggregationParameters = SampleAggregationOptions; string consensusOut = Path.Combine(_TestDataPath, "ConsensusOut.vcf"); VariantComparisonCase ComparisonCase = VennProcessor.GetComparisonCase(VarA, VarB); ConsensusBuilder consensusBuilder = new ConsensusBuilder(consensusOut, _basicOptions); AggregateAllele consensus = consensusBuilder.CombineVariants(VarA, VarB, ComparisonCase); Console.WriteLine(consensus.ToString()); double expectedNoiseLevel = MathOperations.PtoQ( (MathOperations.QtoP(35) + MathOperations.QtoP(2)) / (2.0)); // 5 //GT:GQ:AD:VF:NL:SB:PB:GQX 0/0:100:6785:0.0079:35:-100:-100.0000:100 Assert.NotNull(consensus); Assert.Equal(consensus.VariantQscore, 100); Assert.Equal(consensus.ReferenceAllele, "A"); Assert.Equal(consensus.AlternateAllele, "."); Assert.Equal(consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(consensus.TotalCoverage, 6862); Assert.Equal(consensus.ReferenceSupport, 6785); Assert.Equal(consensus.AlleleSupport, 6785); Assert.Equal(consensus.GenotypeQscore, 100); Assert.Equal(consensus.Frequency, 0.98877877f); Assert.Equal(consensus.NoiseLevelApplied, ((int)expectedNoiseLevel)); Assert.Equal(consensus.NoiseLevelApplied, 5); Assert.Equal(consensus.StrandBiasResults.GATKBiasScore, -100); Assert.Equal(consensus.PoolBiasResults.GATKBiasScore, -100.0000); //now check, we take the min NL score if we are taking the min Q score. // (in this case of combined alt+ref -> ref, the q score will still need to be recalculated. //just with the MIN NL. SampleAggregationOptions.HowToCombineQScore = SampleAggregationParameters.CombineQScoreMethod.TakeMin; ComparisonCase = VennProcessor.GetComparisonCase(VarA, VarB); consensus = consensusBuilder.CombineVariants(VarA, VarB, ComparisonCase); Assert.Equal(consensus.NoiseLevelApplied, 2); Assert.Equal(consensus.VariantQscore, 100); //ok, now sanity check we dont barf if either input is null: ComparisonCase = VennProcessor.GetComparisonCase(VarA, null); consensus = consensusBuilder.CombineVariants(VarA, null, ComparisonCase); Assert.Equal(consensus.NoiseLevelApplied, 35); Assert.Equal(consensus.VariantQscore, 100); ComparisonCase = VennProcessor.GetComparisonCase(null, VarB); consensus = consensusBuilder.CombineVariants(null, VarB, ComparisonCase); Assert.Equal(consensus.NoiseLevelApplied, 2); Assert.Equal(consensus.VariantQscore, 100); //ok, lets check this again, for the PoolQScores option. //sanity check we dont barf if either input is null: SampleAggregationOptions.HowToCombineQScore = SampleAggregationParameters.CombineQScoreMethod.CombinePoolsAndReCalculate; ComparisonCase = VennProcessor.GetComparisonCase(VarA, null); consensus = consensusBuilder.CombineVariants(VarA, null, ComparisonCase); Assert.Equal(consensus.NoiseLevelApplied, 35); Assert.Equal(consensus.VariantQscore, 100);//low freq variant -> nocall. note, qscore would be 41 if NL = 20. ComparisonCase = VennProcessor.GetComparisonCase(null, VarB); consensus = consensusBuilder.CombineVariants(null, VarB, ComparisonCase); Assert.Equal(consensus.NoiseLevelApplied, 2); Assert.Equal(consensus.VariantQscore, 100); //sold ref }
private static Genotype GetGenotype(CalledAllele VariantA, CalledAllele VariantB, VariantComparisonCase Case, int TotalDepth, double VarFrequency, double VarFrequencyA, double VarFrequencyB, SampleAggregationParameters SampleAggregationOptions, VariantCallingParameters variantCallingParameters) { var gtA = Genotype.RefLikeNoCall; var gtB = Genotype.RefLikeNoCall; var tempGT = Genotype.RefLikeNoCall; if (VariantB != null) { gtB = VariantB.Genotype; } if (VariantA != null) { gtA = VariantA.Genotype; } //cases: {0/0 , 0/1, 1/1, ./.} , choose 2. //if (A == B) GTString = A; bool RefPresent = ((VariantA != null && VariantA.HasARefAllele) || (VariantB != null && VariantB.HasARefAllele)); bool AltPresent = ((VariantA != null && VariantA.HasAnAltAllele) || (VariantB != null && VariantB.HasAnAltAllele)); if (!AltPresent && RefPresent) { tempGT = Genotype.HomozygousRef; } else if (AltPresent && RefPresent) { tempGT = Genotype.HeterozygousAltRef; } else if (AltPresent && !RefPresent) { tempGT = Genotype.HomozygousAlt; //todo, expand to cover nocalls and heterozygous calls. } else //(no alt and no reference detected.) { tempGT = Genotype.RefLikeNoCall; } //if its no call, thats fine. we are done if (tempGT == Genotype.RefLikeNoCall) { return(tempGT); } //if the merged GT implies a variant call, //it has to pass some minimal criteria, or it gets //re-classified as a ref type or a no-call. //So. now, check the combined result passed some minimum criteria: //First, never call it a Variant if the combined freq //is smaller than the reporting threshold. //If the freq is low, we should call "0/0" or "./.". , but not "1/1" or "0/1" //So change any "0/1"s or "1/1"s over to "./.". //if we would have called a variant... but... if (Case != VariantComparisonCase.AgreedOnReference) { // ifcombined freq <1% and both per-pool freq <3% -> 0/0 // ifcombined freq <1% and a per-pool freq >3% -> ./. // ifcombined freq >1% and <3% -> ./. //if combined freq <1% if (VarFrequency < variantCallingParameters.MinimumFrequency) { //if its < 3% in both pools but still <1% overall if ((VarFrequencyA < variantCallingParameters.MinimumFrequencyFilter) && (VarFrequencyB < variantCallingParameters.MinimumFrequencyFilter)) { tempGT = Genotype.HomozygousRef; } else //if its > 3% in at least one pool but still <1% overall { tempGT = Genotype.AltLikeNoCall; } } else if (VarFrequency < variantCallingParameters.MinimumFrequencyFilter) {//if combined freq more than 1% but still < 3% tempGT = Genotype.AltLikeNoCall; } //next - we have to clean up any multiple allelic sites. } // also, dont call it a variant *or* a reference // if the combined Depth is less than the minimum. // (this case is defensive programing. The SVC should already call // each pool variant as ".\." , due to indiviudal low depth, // so the combined results // shoud already be ".\." by default. ) else if (TotalDepth < variantCallingParameters.MinimumCoverage) { // note, this could happen even though your input variants are one 'no call' and one 'var', //or even two variants-failing-filters. tempGT = Genotype.RefLikeNoCall; } return(tempGT); }
private static int GetCombinedNLValue(CalledAllele VariantA, CalledAllele VariantB, SampleAggregationParameters SampleAggregationOptions) { if (VariantA == null) { return(VariantB.NoiseLevelApplied); } if (VariantB == null) { return(VariantA.NoiseLevelApplied); } if (SampleAggregationOptions.HowToCombineQScore == SampleAggregationParameters.CombineQScoreMethod.TakeMin) { return(Math.Min(VariantA.NoiseLevelApplied, VariantB.NoiseLevelApplied)); } else { return(CombineNoiseLevelsByTakingAvgP(VariantA.NoiseLevelApplied, VariantB.NoiseLevelApplied)); } }
private static BiasResults GetCombinedSBValue(CalledAllele VariantA, CalledAllele VariantB, SampleAggregationParameters SampleAggregationOptions) { BiasResults StrandBiasResults = new BiasResults(); if (VariantA == null) { return(VariantB.StrandBiasResults); } if (VariantB == null) { return(VariantA.StrandBiasResults); } StrandBiasResults.GATKBiasScore = Math.Max(VariantA.StrandBiasResults.GATKBiasScore, VariantB.StrandBiasResults.GATKBiasScore); return(StrandBiasResults); }
private static void RecalculateScoring(CalledAllele VariantA, CalledAllele VariantB, VariantComparisonCase Case, AggregateAllele ConsensusAllele, SampleAggregationParameters SampleAggregationOptions, VariantCallingParameters variantCallingParameters) { int RefCountB = 0, RefCountA = 0; int AltCountB = 0, AltCountA = 0; int DepthA = 0; int DepthB = 0; //1) first, calculate all the component values (variant frequency, etc...) if (VariantA != null) { RefCountA = VariantA.ReferenceSupport; AltCountA = (VariantA.IsRefType) ? 0 : VariantA.AlleleSupport; DepthA = VariantA.TotalCoverage; } if (VariantB != null) { RefCountB = VariantB.ReferenceSupport; AltCountB = (VariantB.IsRefType) ? 0 : VariantB.AlleleSupport; DepthB = VariantB.TotalCoverage; } int TotalDepth = DepthA + DepthB; int ReferenceDepth = RefCountA + RefCountB; int AltDepth = AltCountA + AltCountB; double VarFrequency = ((AltDepth == 0) || (TotalDepth == 0)) ? 0.0 : ((double)AltDepth) / ((double)(TotalDepth)); double VarFrequencyA = ((AltCountA == 0) || (DepthA == 0)) ? 0.0 : ((double)AltCountA) / ((double)(DepthA)); double VarFrequencyB = ((AltCountB == 0) || (DepthB == 0)) ? 0.0 : ((double)AltCountB) / ((double)(DepthB)); ConsensusAllele.TotalCoverage = TotalDepth; ConsensusAllele.AlleleSupport = AltDepth; ConsensusAllele.ReferenceSupport = ReferenceDepth; var GT = GetGenotype(VariantA, VariantB, Case, TotalDepth, VarFrequency, VarFrequencyA, VarFrequencyB, SampleAggregationOptions, variantCallingParameters); ConsensusAllele.NoiseLevelApplied = GetCombinedNLValue(VariantA, VariantB, SampleAggregationOptions); ConsensusAllele.StrandBiasResults = GetCombinedSBValue(VariantA, VariantB, SampleAggregationOptions); //its possible the GTString went from var -> ref when we combined the results. //If that is the case we do not want to write "variant" anymore to the .vcf. //We also have to re-calculate the Q scores for a reference call. //They need to be based on a reference model, not a variant model. bool AltChangedToRef = PushThroughRamificationsOfGTChange(VariantA, VariantB, ConsensusAllele, RefCountA, RefCountB, DepthA, DepthB, GT, variantCallingParameters.MaximumVariantQScore, Case); ConsensusAllele.Genotype = GT; ConsensusAllele.PoolBiasResults = GetProbePoolBiasScore(Case, ConsensusAllele, SampleAggregationOptions.ProbePoolBiasThreshold, variantCallingParameters, AltCountA, AltCountB, DepthA, DepthB, GT, AltChangedToRef); if (SampleAggregationOptions.HowToCombineQScore == SampleAggregationParameters.CombineQScoreMethod.TakeMin) { ConsensusAllele.VariantQscore = CombineQualitiesByTakingMinValue(VariantA, VariantB); } else //VariantCallingCombinePoolSettings.CombineQScoreMethod.CombinePoolsAndReCalculate { //where we apply the reference Q model: if (Case == VariantComparisonCase.AgreedOnReference) { ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied, variantCallingParameters.MaximumVariantQScore); } else if ((Case == VariantComparisonCase.OneReferenceOneAlternate) && (AltChangedToRef)) { ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied, variantCallingParameters.MaximumVariantQScore); } else if ((Case == VariantComparisonCase.CanNotCombine) && (AltDepth == 0)) //so the only call we had must have been ref { ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied, variantCallingParameters.MaximumVariantQScore); } //where we apply the variant Q model. this is most cases else // cases are aggreed on alt, or one alt call. in which case, apply variant Q model. { ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(AltDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied, variantCallingParameters.MaximumVariantQScore); } } //assuming this is only used on Somatic... ConsensusAllele.GenotypeQscore = ConsensusAllele.VariantQscore; ConsensusAllele.SetType(); if (ConsensusAllele.IsRefType) { ConsensusAllele.AlleleSupport = ConsensusAllele.ReferenceSupport; } }