/// <summary>
        /// Combine two variants.
        /// Variant B can be null.
        /// We never try to combine two different variant alleles.
        /// </summary>
        /// <param name="VariantsA"></param>
        /// <param name="VariantsB"></param>
        /// <param name="ComparisonCase"></param>
        /// <param name="Consensus"></param>
        public AggregateAllele CombineVariants(CalledAllele VariantA, CalledAllele VariantB,
                                               VariantComparisonCase ComparisonCase)
        {
            SampleAggregationParameters SampleAggregationOptions = _options.SampleAggregationParameters;
            var Consensus = new AggregateAllele(new List <CalledAllele> {
                VariantA, VariantB
            });
            int DepthA = 0;
            int DepthB = 0;

            //(A) set the reference data.
            //this should be the same for both.
            if (VariantA != null)
            {
                DoDefensiveGenotyping(VariantA);
                Consensus.Chromosome        = VariantA.Chromosome;
                Consensus.ReferencePosition = VariantA.ReferencePosition;
                Consensus.ReferenceAllele   = VariantA.ReferenceAllele;
                DepthA = VariantA.TotalCoverage;
            }
            if (VariantB != null)
            {
                DoDefensiveGenotyping(VariantB);
                Consensus.Chromosome        = VariantB.Chromosome;
                Consensus.ReferencePosition = VariantB.ReferencePosition;
                Consensus.ReferenceAllele   = VariantB.ReferenceAllele;
                DepthB = VariantB.TotalCoverage;
            }

            //normally the reference data is the same for both, no matter what the case.
            //but if we have one deletion and one not, we might have different ref alleles.
            //So we need to get this right.
            Consensus.ReferenceAllele = CombineReferenceAlleles(VariantA, VariantB, ComparisonCase);

            // (B) set the Alternate data
            Consensus.AlternateAllele = CombineVariantAlleles(VariantA, VariantB, ComparisonCase);

            // (C) set filters, etc.
            Consensus.Filters = CombineFilters(VariantA, VariantB);

            // (E) set GT data, includes calculating the probe-pool bias and quality scores
            RecalculateScoring(VariantA, VariantB, ComparisonCase, Consensus, SampleAggregationOptions, _options.VariantCallingParams);

            return(Consensus);
        }
Esempio n. 2
0
        public VcfWriterConfig(VariantCallingParameters callerOptions,
                               VcfWritingParameters outputOptions, BamFilterParameters bamFilterOptions, SampleAggregationParameters sampleAggregationParameters,
                               bool debugMode, bool outputBiasFiles, bool hasForcedGT = false)
        {
            DepthFilterThreshold                = outputOptions.OutputGvcfFile ? callerOptions.MinimumCoverage : (callerOptions.LowDepthFilter > callerOptions.MinimumCoverage) ? callerOptions.LowDepthFilter : (int?)null;
            IndelRepeatFilterThreshold          = callerOptions.IndelRepeatFilter > 0 ? callerOptions.IndelRepeatFilter : (int?)null;
            VariantQualityFilterThreshold       = callerOptions.MinimumVariantQScoreFilter;
            GenotypeQualityFilterThreshold      = callerOptions.LowGenotypeQualityFilter.HasValue && callerOptions.MinimumVariantQScoreFilter > callerOptions.MinimumVariantQScore ? callerOptions.LowGenotypeQualityFilter : null;
            StrandBiasFilterThreshold           = callerOptions.StrandBiasAcceptanceCriteria < 1 ? callerOptions.StrandBiasAcceptanceCriteria : (float?)null;
            AmpliconBiasFilterThreshold         = callerOptions.AmpliconBiasFilterThreshold > 0 ? callerOptions.AmpliconBiasFilterThreshold : (float?)null;
            FrequencyFilterThreshold            = GetMinFreqFilterForVcfHeader(callerOptions);
            MinFrequencyThreshold               = callerOptions.MinimumFrequency;
            ShouldOutputNoCallFraction          = outputOptions.ReportNoCalls;
            ShouldOutputStrandBiasAndNoiseLevel = ShouldOutputNoiseLevelAndStrandBias(debugMode, outputBiasFiles, callerOptions.StrandBiasAcceptanceCriteria);
            ShouldFilterOnlyOneStrandCoverage   = callerOptions.FilterOutVariantsPresentOnlyOneStrand;
            EstimatedBaseCallQuality            = callerOptions.NoiseLevelUsedForQScoring;
            ShouldOutputRcCounts                = outputOptions.ReportRcCounts;
            ShouldOutputTsCounts                = outputOptions.ReportTsCounts;
            AllowMultipleVcfLinesPerLoci        = outputOptions.AllowMultipleVcfLinesPerLoci;
            PloidyModel = callerOptions.PloidyModel;
            RMxNFilterMaxLengthRepeat = callerOptions.RMxNFilterMaxLengthRepeat;
            RMxNFilterMinRepetitions  = callerOptions.RMxNFilterMinRepetitions;
            RMxNFilterFrequencyLimit  = callerOptions.RMxNFilterFrequencyLimit;
            NoiseModel            = callerOptions.NoiseModel;
            ShouldReportGp        = outputOptions.ReportGp;
            NoCallFilterThreshold = callerOptions.NoCallFilterThreshold;
            ShouldOutputSuspiciousCoverageFraction = outputOptions.ReportSuspiciousCoverageFraction;

            if (sampleAggregationParameters != null)
            {
                ShouldOutputProbeBias        = true;
                ProbePoolBiasFilterThreshold = sampleAggregationParameters.ProbePoolBiasThreshold;
            }
            HasForcedGt = hasForcedGT;
        }
Esempio n. 3
0
        public void VennVcf_CombineTwoPoolVariants_Qscore_DiffentNL_Test()
        {
            //chr3	41266161	.	A	G	30	PASS	DP=3067	GT:GQ:AD:VF:NL:SB	0/1:30:3005,54:0.0176:35:-100.0000
            CalledAllele VarA = new CalledAllele()
            {
                Chromosome        = "chr3",
                ReferencePosition = 41266161,
                TotalCoverage     = 3067,
                Genotype          = Pisces.Domain.Types.Genotype.HeterozygousAltRef,
                VariantQscore     = 30,
                GenotypeQscore    = 30,
                AlleleSupport     = 54,
                ReferenceSupport  = 3005,
                NoiseLevelApplied = 35,
                StrandBiasResults = new Pisces.Domain.Models.BiasResults()
                {
                    GATKBiasScore = -100.0000
                },
                ReferenceAllele = "A",
                AlternateAllele = "G",
                Type            = Pisces.Domain.Types.AlleleCategory.Snv
            };


            ///chr3	41266161	.	A	.	75	PASS	DP=3795	GT:GQ:AD:VF:NL:SB	0/0:75:3780:0.0040:2:-100.0000
            CalledAllele VarB = new CalledAllele()
            {
                Chromosome        = "chr3",
                ReferencePosition = 41266161,
                TotalCoverage     = 3795,
                Genotype          = Pisces.Domain.Types.Genotype.HomozygousRef,
                VariantQscore     = 75,
                GenotypeQscore    = 75,
                AlleleSupport     = 3780,
                ReferenceSupport  = 3780,
                NoiseLevelApplied = 2,
                StrandBiasResults = new Pisces.Domain.Models.BiasResults()
                {
                    GATKBiasScore = -100.0000
                },
                ReferenceAllele = "A",
                AlternateAllele = ".",
                Type            = Pisces.Domain.Types.AlleleCategory.Reference
            };



            //old answer
            //chr3	41266161	.	A	.	100.00	PASS	DP=6862;cosmic=COSM1423020,COSM1423021;EVS=0|69.0|6503;phastCons	GT:GQ:AD:VF:NL:SB:PB:GQX	0/0:100:6785:0.0079:35:-100:-100.0000:100

            SampleAggregationParameters SampleAggregationOptions = new SampleAggregationParameters();

            SampleAggregationOptions.ProbePoolBiasThreshold = 0.5f;
            SampleAggregationOptions.HowToCombineQScore     = SampleAggregationParameters.CombineQScoreMethod.CombinePoolsAndReCalculate;

            _basicOptions.BamFilterParams.MinimumBaseCallQuality      = 20;
            _basicOptions.VariantCallingParams.MinimumFrequency       = 0.01f;
            _basicOptions.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            _basicOptions.SampleAggregationParameters = SampleAggregationOptions;

            string consensusOut = Path.Combine(_TestDataPath, "ConsensusOut.vcf");
            VariantComparisonCase ComparisonCase   = VennProcessor.GetComparisonCase(VarA, VarB);
            ConsensusBuilder      consensusBuilder = new ConsensusBuilder(consensusOut, _basicOptions);
            AggregateAllele       consensus        = consensusBuilder.CombineVariants(VarA, VarB, ComparisonCase);

            Console.WriteLine(consensus.ToString());

            double expectedNoiseLevel = MathOperations.PtoQ(
                (MathOperations.QtoP(35) + MathOperations.QtoP(2)) / (2.0)); // 5

            //GT:GQ:AD:VF:NL:SB:PB:GQX	0/0:100:6785:0.0079:35:-100:-100.0000:100
            Assert.NotNull(consensus);
            Assert.Equal(consensus.VariantQscore, 100);
            Assert.Equal(consensus.ReferenceAllele, "A");
            Assert.Equal(consensus.AlternateAllele, ".");
            Assert.Equal(consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(consensus.TotalCoverage, 6862);
            Assert.Equal(consensus.ReferenceSupport, 6785);
            Assert.Equal(consensus.AlleleSupport, 6785);
            Assert.Equal(consensus.GenotypeQscore, 100);
            Assert.Equal(consensus.Frequency, 0.98877877f);
            Assert.Equal(consensus.NoiseLevelApplied, ((int)expectedNoiseLevel));
            Assert.Equal(consensus.NoiseLevelApplied, 5);
            Assert.Equal(consensus.StrandBiasResults.GATKBiasScore, -100);
            Assert.Equal(consensus.PoolBiasResults.GATKBiasScore, -100.0000);


            //now check, we take the min NL score if we are taking the min Q score.
            // (in this case of combined alt+ref -> ref, the q score will still need to be recalculated.
            //just with the MIN NL.
            SampleAggregationOptions.HowToCombineQScore = SampleAggregationParameters.CombineQScoreMethod.TakeMin;
            ComparisonCase = VennProcessor.GetComparisonCase(VarA, VarB);
            consensus      = consensusBuilder.CombineVariants(VarA, VarB, ComparisonCase);
            Assert.Equal(consensus.NoiseLevelApplied, 2);
            Assert.Equal(consensus.VariantQscore, 100);


            //ok, now sanity check we dont barf if either input is null:
            ComparisonCase = VennProcessor.GetComparisonCase(VarA, null);
            consensus      = consensusBuilder.CombineVariants(VarA, null, ComparisonCase);
            Assert.Equal(consensus.NoiseLevelApplied, 35);
            Assert.Equal(consensus.VariantQscore, 100);

            ComparisonCase = VennProcessor.GetComparisonCase(null, VarB);
            consensus      = consensusBuilder.CombineVariants(null, VarB, ComparisonCase);
            Assert.Equal(consensus.NoiseLevelApplied, 2);
            Assert.Equal(consensus.VariantQscore, 100);

            //ok, lets check this again, for the PoolQScores option.
            //sanity check we dont barf if either input is null:
            SampleAggregationOptions.HowToCombineQScore = SampleAggregationParameters.CombineQScoreMethod.CombinePoolsAndReCalculate;
            ComparisonCase = VennProcessor.GetComparisonCase(VarA, null);
            consensus      = consensusBuilder.CombineVariants(VarA, null, ComparisonCase);
            Assert.Equal(consensus.NoiseLevelApplied, 35);
            Assert.Equal(consensus.VariantQscore, 100);//low freq variant -> nocall. note, qscore would be 41 if NL = 20.

            ComparisonCase = VennProcessor.GetComparisonCase(null, VarB);
            consensus      = consensusBuilder.CombineVariants(null, VarB, ComparisonCase);
            Assert.Equal(consensus.NoiseLevelApplied, 2);
            Assert.Equal(consensus.VariantQscore, 100); //sold ref
        }
        private static Genotype GetGenotype(CalledAllele VariantA, CalledAllele VariantB, VariantComparisonCase Case,
                                            int TotalDepth, double VarFrequency, double VarFrequencyA, double VarFrequencyB, SampleAggregationParameters SampleAggregationOptions, VariantCallingParameters variantCallingParameters)
        {
            var gtA    = Genotype.RefLikeNoCall;
            var gtB    = Genotype.RefLikeNoCall;
            var tempGT = Genotype.RefLikeNoCall;

            if (VariantB != null)
            {
                gtB = VariantB.Genotype;
            }
            if (VariantA != null)
            {
                gtA = VariantA.Genotype;
            }

            //cases:  {0/0 , 0/1,  1/1, ./.} , choose 2.

            //if (A == B)  GTString  = A;

            bool RefPresent = ((VariantA != null && VariantA.HasARefAllele) || (VariantB != null && VariantB.HasARefAllele));
            bool AltPresent = ((VariantA != null && VariantA.HasAnAltAllele) || (VariantB != null && VariantB.HasAnAltAllele));

            if (!AltPresent && RefPresent)
            {
                tempGT = Genotype.HomozygousRef;
            }
            else if (AltPresent && RefPresent)
            {
                tempGT = Genotype.HeterozygousAltRef;
            }
            else if (AltPresent && !RefPresent)
            {
                tempGT = Genotype.HomozygousAlt;

                //todo, expand to cover nocalls and heterozygous calls.
            }
            else //(no alt and no reference detected.)
            {
                tempGT = Genotype.RefLikeNoCall;
            }

            //if its  no call, thats fine. we are done
            if (tempGT == Genotype.RefLikeNoCall)
            {
                return(tempGT);
            }

            //if the merged GT implies a variant call,
            //it has to pass some minimal criteria, or it gets
            //re-classified as a ref type or a no-call.
            //So. now, check the combined result passed some minimum criteria:

            //First, never call it a Variant if the combined freq
            //is smaller than the reporting threshold.
            //If the freq is low, we should call "0/0" or "./.".  , but not "1/1" or "0/1"
            //So change any "0/1"s or "1/1"s over to "./.".

            //if we would have called a variant... but...
            if (Case != VariantComparisonCase.AgreedOnReference)
            {
                // ifcombined freq <1% and both per-pool freq <3% -> 0/0
                // ifcombined freq <1% and a per-pool freq >3% -> ./.
                // ifcombined freq >1% and <3%      -> ./.

                //if combined freq <1%
                if (VarFrequency < variantCallingParameters.MinimumFrequency)
                {
                    //if its < 3% in both pools but still <1% overall
                    if ((VarFrequencyA < variantCallingParameters.MinimumFrequencyFilter) &&
                        (VarFrequencyB < variantCallingParameters.MinimumFrequencyFilter))
                    {
                        tempGT = Genotype.HomozygousRef;
                    }
                    else                     //if its > 3% in at least one pool but still <1% overall
                    {
                        tempGT = Genotype.AltLikeNoCall;
                    }
                }
                else if (VarFrequency < variantCallingParameters.MinimumFrequencyFilter)
                {//if combined freq more than 1% but still < 3%
                    tempGT = Genotype.AltLikeNoCall;
                }

                //next - we have to clean up any multiple allelic sites.
            }
            // also, dont call it a variant *or* a reference
            // if the combined Depth is less than the minimum.
            // (this case is defensive programing.  The SVC should already call
            // each pool variant as ".\." , due to indiviudal low depth,
            // so the combined results
            // shoud already be ".\." by default. )
            else if (TotalDepth < variantCallingParameters.MinimumCoverage)
            {
                // note, this could happen even though your input variants are one 'no call' and one 'var',
                //or even two variants-failing-filters.
                tempGT = Genotype.RefLikeNoCall;
            }
            return(tempGT);
        }
        private static int GetCombinedNLValue(CalledAllele VariantA, CalledAllele VariantB, SampleAggregationParameters SampleAggregationOptions)
        {
            if (VariantA == null)
            {
                return(VariantB.NoiseLevelApplied);
            }

            if (VariantB == null)
            {
                return(VariantA.NoiseLevelApplied);
            }

            if (SampleAggregationOptions.HowToCombineQScore == SampleAggregationParameters.CombineQScoreMethod.TakeMin)
            {
                return(Math.Min(VariantA.NoiseLevelApplied, VariantB.NoiseLevelApplied));
            }
            else
            {
                return(CombineNoiseLevelsByTakingAvgP(VariantA.NoiseLevelApplied, VariantB.NoiseLevelApplied));
            }
        }
        private static BiasResults GetCombinedSBValue(CalledAllele VariantA, CalledAllele VariantB, SampleAggregationParameters SampleAggregationOptions)
        {
            BiasResults StrandBiasResults = new BiasResults();

            if (VariantA == null)
            {
                return(VariantB.StrandBiasResults);
            }

            if (VariantB == null)
            {
                return(VariantA.StrandBiasResults);
            }

            StrandBiasResults.GATKBiasScore = Math.Max(VariantA.StrandBiasResults.GATKBiasScore, VariantB.StrandBiasResults.GATKBiasScore);
            return(StrandBiasResults);
        }
        private static void RecalculateScoring(CalledAllele VariantA, CalledAllele VariantB,
                                               VariantComparisonCase Case,
                                               AggregateAllele ConsensusAllele, SampleAggregationParameters SampleAggregationOptions, VariantCallingParameters variantCallingParameters)
        {
            int RefCountB = 0, RefCountA = 0;
            int AltCountB = 0, AltCountA = 0;
            int DepthA = 0;
            int DepthB = 0;

            //1) first, calculate all the component values (variant frequency, etc...)
            if (VariantA != null)
            {
                RefCountA = VariantA.ReferenceSupport;
                AltCountA = (VariantA.IsRefType) ? 0 : VariantA.AlleleSupport;
                DepthA    = VariantA.TotalCoverage;
            }

            if (VariantB != null)
            {
                RefCountB = VariantB.ReferenceSupport;
                AltCountB = (VariantB.IsRefType) ? 0 : VariantB.AlleleSupport;
                DepthB    = VariantB.TotalCoverage;
            }


            int TotalDepth     = DepthA + DepthB;
            int ReferenceDepth = RefCountA + RefCountB;
            int AltDepth       = AltCountA + AltCountB;

            double VarFrequency  = ((AltDepth == 0) || (TotalDepth == 0)) ? 0.0 : ((double)AltDepth) / ((double)(TotalDepth));
            double VarFrequencyA = ((AltCountA == 0) || (DepthA == 0)) ? 0.0 : ((double)AltCountA) / ((double)(DepthA));
            double VarFrequencyB = ((AltCountB == 0) || (DepthB == 0)) ? 0.0 : ((double)AltCountB) / ((double)(DepthB));

            ConsensusAllele.TotalCoverage    = TotalDepth;
            ConsensusAllele.AlleleSupport    = AltDepth;
            ConsensusAllele.ReferenceSupport = ReferenceDepth;

            var GT = GetGenotype(VariantA, VariantB, Case,
                                 TotalDepth, VarFrequency, VarFrequencyA, VarFrequencyB, SampleAggregationOptions, variantCallingParameters);

            ConsensusAllele.NoiseLevelApplied = GetCombinedNLValue(VariantA, VariantB, SampleAggregationOptions);
            ConsensusAllele.StrandBiasResults = GetCombinedSBValue(VariantA, VariantB, SampleAggregationOptions);

            //its possible the GTString went from var -> ref when we combined the results.
            //If that is the case we do not want to write "variant" anymore to the .vcf.
            //We also have to re-calculate the Q scores for a reference call.
            //They need to be based on a reference model, not a variant model.
            bool AltChangedToRef = PushThroughRamificationsOfGTChange(VariantA, VariantB,
                                                                      ConsensusAllele, RefCountA, RefCountB, DepthA, DepthB, GT,
                                                                      variantCallingParameters.MaximumVariantQScore, Case);

            ConsensusAllele.Genotype        = GT;
            ConsensusAllele.PoolBiasResults = GetProbePoolBiasScore(Case, ConsensusAllele,
                                                                    SampleAggregationOptions.ProbePoolBiasThreshold, variantCallingParameters, AltCountA, AltCountB, DepthA, DepthB, GT, AltChangedToRef);

            if (SampleAggregationOptions.HowToCombineQScore == SampleAggregationParameters.CombineQScoreMethod.TakeMin)
            {
                ConsensusAllele.VariantQscore = CombineQualitiesByTakingMinValue(VariantA, VariantB);
            }
            else //VariantCallingCombinePoolSettings.CombineQScoreMethod.CombinePoolsAndReCalculate
            {
                //where we apply the reference Q model:
                if (Case == VariantComparisonCase.AgreedOnReference)
                {
                    ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied,
                                                                                   variantCallingParameters.MaximumVariantQScore);
                }
                else if ((Case == VariantComparisonCase.OneReferenceOneAlternate) && (AltChangedToRef))
                {
                    ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied,
                                                                                   variantCallingParameters.MaximumVariantQScore);
                }
                else if ((Case == VariantComparisonCase.CanNotCombine) && (AltDepth == 0)) //so the only call we had must have been ref
                {
                    ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(ReferenceDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied,
                                                                                   variantCallingParameters.MaximumVariantQScore);
                }

                //where we apply the variant Q model. this is most cases
                else // cases are aggreed on alt, or one alt call.  in which case, apply variant Q model.
                {
                    ConsensusAllele.VariantQscore = CombineQualitiesByPoolingReads(AltDepth, TotalDepth, ConsensusAllele.NoiseLevelApplied,
                                                                                   variantCallingParameters.MaximumVariantQScore);
                }
            }

            //assuming this is only used on Somatic...
            ConsensusAllele.GenotypeQscore = ConsensusAllele.VariantQscore;
            ConsensusAllele.SetType();

            if (ConsensusAllele.IsRefType)
            {
                ConsensusAllele.AlleleSupport = ConsensusAllele.ReferenceSupport;
            }
        }