/// <summary> /// Evaluate joint log likelihood of all genotype combinations across samples. /// Return joint likelihood object and the copy number states with the highest likelihood /// </summary> public static (SampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetCopyNumbersNoPedigreeInfo(ISampleMap <CanvasSegment> segments, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods) { // for non-pedigree samples JointLogLikelihoods object contains only maximum likelihood information var jointLogLikelihoods = new JointLikelihoods(); var sampleCopyNumbersGenotypes = new SampleMap <Genotype>(); foreach (var sampleId in segments.SampleIds) { var(copyNumber, maxSampleLikelihood) = singleSampleLikelihoods[sampleId].MaxBy(x => x.Value); jointLogLikelihoods.MaximalLogLikelihood += Math.Log(maxSampleLikelihood); sampleCopyNumbersGenotypes.Add(sampleId, copyNumber); } return(copyNumbersGenotypes : sampleCopyNumbersGenotypes, jointLikelihood : jointLogLikelihoods); }
/// <summary> /// Perform de-novo CNV calling in two steps: /// 1. Filter REF variants and common CNVs, this step relies only on total CN calls with associated shortcomings /// 2. Assign de-novo quality based on joint likelihood across pedigree using marginalisation operations /// </summary> /// <param name="canvasSegments"></param> /// <param name="samplesInfo"></param> /// <param name="parentIDs"></param> /// <param name="offspringIDs"></param> /// <param name="copyNumbersLikelihoods"></param> private void SetDenovoQualityScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, List <SampleId> offspringIDs, JointLikelihoods copyNumbersLikelihoods) { foreach (var probandId in offspringIDs) { // targeted proband is REF if (IsReferenceVariant(canvasSegments, samplesInfo, probandId)) { continue; } // common variant if (CanvasPedigreeCaller.IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, _callerParameters.MaximumCopyNumber)) { continue; } // other offsprings are ALT if (!offspringIDs.Except(probandId.ToEnumerable()).All(id => IsReferenceVariant(canvasSegments, samplesInfo, id))) { continue; } // not all q-scores are above the threshold if (parentIDs.Concat(probandId).Any(id => !IsPassVariant(canvasSegments, id))) { continue; } double deNovoQualityScore = CanvasPedigreeCaller.GetConditionalDeNovoQualityScore(canvasSegments, copyNumbersLikelihoods, samplesInfo, parentIDs, probandId); // adjustment so that denovo quality score threshold is 20 (rather than 10) to match Manta deNovoQualityScore *= 2; if (Double.IsInfinity(deNovoQualityScore) | deNovoQualityScore > _callerParameters.MaxQscore) { deNovoQualityScore = _callerParameters.MaxQscore; } canvasSegments[probandId].DqScore = deNovoQualityScore; } }
/// <summary> /// Assess likelihood of a de-novo variant for copyNumberGenotypes configuration with a Mendelian conflict /// </summary> /// <param name="canvasSegments"></param> /// <param name="jointLikelihoods"></param> /// <param name="parentIDs"></param> /// <param name="probandId"></param> /// <param name="samplesInfo"></param> /// <returns></returns> internal static double GetConditionalDeNovoQualityScore(ISampleMap <CanvasSegment> canvasSegments, JointLikelihoods jointLikelihoods, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, SampleId probandId) { const double q60 = 0.000001; var parent1Ploidy = Genotype.Create(samplesInfo[parentIDs.First()].GetPloidy(canvasSegments[parentIDs.First()])); var parent2Ploidy = Genotype.Create(samplesInfo[parentIDs.Last()].GetPloidy(canvasSegments[parentIDs.Last()])); int probandPloidy = samplesInfo[probandId].GetPloidy(canvasSegments[probandId]); double deNovoGainMarginalLikelihood = jointLikelihoods.GetMarginalGainDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)), new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy)); double deNovoLossMarginalLikelihood = jointLikelihoods.GetMarginalLossDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)), new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy)); double denovoProbability = canvasSegments[probandId].CopyNumber > probandPloidy ? 1 - deNovoGainMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoLossMarginalLikelihood) : 1 - deNovoLossMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoGainMarginalLikelihood); // likelihood of proband genotype != ALT given "copyNumberGenotypes" configuration in pedigree with Mendelian conflict return(-10.0 * Math.Log10(Math.Max(denovoProbability, q60))); }
private void EstimateQScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> pedigreeMembersInfo, PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods, JointLikelihoods copyNumberLikelihoods, ISampleMap <Genotype> copyNumbers) { foreach (var sampleId in canvasSegments.SampleIds) { canvasSegments[sampleId].QScore = GetSingleSampleQualityScore(singleSampleLikelihoods[sampleId], copyNumbers[sampleId]); canvasSegments[sampleId].CopyNumber = copyNumbers[sampleId].TotalCopyNumber; if (canvasSegments[sampleId].QScore < _qualityFilterThreshold) { canvasSegments[sampleId].Filter = CanvasFilter.Create(new[] { $"q{_qualityFilterThreshold}" }); } } if (pedigreeInfo.HasFullPedigree()) { SetDenovoQualityScores(canvasSegments, pedigreeMembersInfo, pedigreeInfo.ParentsIds, pedigreeInfo.OffspringIds, copyNumberLikelihoods); } }
/// <summary> /// Estimate joint likelihood and most likely CN assignment within a pedigree using total CN Genotype likelihoods and transition matrix /// </summary> /// <param name="pedigreeInfo"></param> /// <param name="copyNumbersLikelihoods"></param> /// <returns></returns> private (ISampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetPedigreeCopyNumbers(PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > copyNumbersLikelihoods) { int nHighestLikelihoodGenotypes = pedigreeInfo != null && pedigreeInfo.OffspringIds.Count >= 2 ? 3 : _callerParameters.MaximumCopyNumber; copyNumbersLikelihoods = copyNumbersLikelihoods.SelectValues(l => l.OrderByDescending(kvp => kvp.Value).Take(nHighestLikelihoodGenotypes).ToDictionary()); var sampleCopyNumbersGenotypes = new SampleMap <Genotype>(); var jointLikelihood = new JointLikelihoods(); if (!pedigreeInfo.HasFullPedigree()) { return(sampleCopyNumbersGenotypes, jointLikelihood); } // parent 1 total CNs and likelihoods foreach (var copyNumberParent1 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.First()]) { // parent 2 total CNs and likelihoods foreach (var copyNumberParent2 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.Last()]) { // for offspring in addition to querying likelihoods using total CNs, iterate over all possible genotype combination (CopyNumberA/B) for a given // CN and estimate likely transition probabilities using TransitionMatrix foreach (var offspringGtStates in pedigreeInfo.OffspringPhasedGenotypes) { if (!pedigreeInfo.OffspringIds.All(id => copyNumbersLikelihoods[id].ContainsKey( Genotype.Create(Math.Min(offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberA + offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1))))) { // unavailable total CN continue; } // For a given combination of offspring copy numbers, only the genotypes that result in the maximum likelihood contribute to the final result." double currentLikelihood = copyNumberParent1.Value * copyNumberParent2.Value; var totalCopyNumberGenotypes = new List <Genotype>(); for (var counter = 0; counter < pedigreeInfo.OffspringIds.Count; counter++) { var child = pedigreeInfo.OffspringIds[counter]; var copyNumberGenotypeChild = Genotype.Create(Math.Min(offspringGtStates[counter].PhasedGenotype.CopyNumberA + offspringGtStates[counter].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1)); totalCopyNumberGenotypes.Add(copyNumberGenotypeChild); currentLikelihood *= pedigreeInfo.TransitionMatrix[copyNumberParent1.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberA] * pedigreeInfo.TransitionMatrix[copyNumberParent2.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberB] * copyNumbersLikelihoods[child][copyNumberGenotypeChild]; } currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood; var genotypesInPedigree = new SampleMap <Genotype> { { pedigreeInfo.ParentsIds.First(), copyNumberParent1.Key }, { pedigreeInfo.ParentsIds.Last(), copyNumberParent2.Key } }; pedigreeInfo.OffspringIds.Zip(totalCopyNumberGenotypes).ForEach(sampleIdGenotypeKvp => genotypesInPedigree.Add(sampleIdGenotypeKvp.Item1, sampleIdGenotypeKvp.Item2)); genotypesInPedigree = genotypesInPedigree.OrderBy(pedigreeInfo.AllSampleIds); jointLikelihood.AddJointLikelihood(genotypesInPedigree, currentLikelihood); double currentLogLikelihood = Math.Log(currentLikelihood); if (currentLogLikelihood > jointLikelihood.MaximalLogLikelihood) { jointLikelihood.MaximalLogLikelihood = currentLogLikelihood; sampleCopyNumbersGenotypes = genotypesInPedigree; } } } } if (sampleCopyNumbersGenotypes.Empty()) { throw new IlluminaException("Maximal likelihood was not found"); } return(sampleCopyNumbersGenotypes, jointLikelihood); }