/// <summary> /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores /// </summary> private void GetHighestLogLikelihoodSegmentsSet(ISampleMap <OverlappingSegmentsRegion> canvasSegmentsSet, ISampleMap <SampleMetrics> pedigreeMembersInfo, ISampleMap <ICopyNumberModel> model) { SegmentsSet segmentSet; if (canvasSegmentsSet.Values.First().SetA == null) { segmentSet = SegmentsSet.SetB; } else if (canvasSegmentsSet.Values.First().SetB == null) { segmentSet = SegmentsSet.SetA; } else { segmentSet = GetSegmentSetLogLikelihood(canvasSegmentsSet, pedigreeMembersInfo, model, SegmentsSet.SetA) > GetSegmentSetLogLikelihood(canvasSegmentsSet, pedigreeMembersInfo, model, SegmentsSet.SetB) ? SegmentsSet.SetA : SegmentsSet.SetB; } canvasSegmentsSet.SampleIds.ForEach(id => canvasSegmentsSet[id].SetSet(segmentSet)); }
/// <summary> /// Calculates maximal likelihood for segments without SNV allele ratios. Updated CanvasSegment CopyNumber only. /// Use likelihoods as only median point estimator is used /// </summary> public ISampleMap <Dictionary <Genotype, double> > GetCopyNumbersLikelihoods(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel) { const int bins2Remove = 5; var genotypes = Enumerable.Range(0, _maximumCopyNumber).Select(Genotype.Create).ToList(); const double maxCoverageMultiplier = 3.0; var singleSampleLikelihoods = new SampleMap <Dictionary <Genotype, double> >(); foreach (var sampleId in canvasSegments.SampleIds) { var density = new Dictionary <Genotype, double>(); foreach (var genotypeCopyNumber in genotypes) { double currentLikelihood = copyNumberModel[sampleId].GetTotalCopyNumberLikelihoods( Math.Min(canvasSegments[sampleId].TruncatedMedianCount(bins2Remove), samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier), genotypeCopyNumber); currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood; density[genotypeCopyNumber] = currentLikelihood; } singleSampleLikelihoods.Add(sampleId, density); } return(singleSampleLikelihoods); }
/// <summary> /// Given a set canvasSegmentsSet with two alternative segmentation hypothesis (SegmentsSet: SetA and SetB), return log likelihood /// for a segmentation hypothesis specified by segmentsSet. Segmentation hypothesis could typically include segmentation results specified /// by partitioning or annotations of population (common) variants /// </summary> /// <param name="canvasSegmentsSet"></param> /// <param name="samplesInfo"></param> /// <param name="copyNumberModel"></param> /// <param name="segmentsSet"></param> /// <returns></returns> private double GetSegmentSetLogLikelihood(ISampleMap <OverlappingSegmentsRegion> canvasSegmentsSet, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, SegmentsSet segmentsSet) { double segmentSetLogLikelihood = 0; foreach (var sampleId in canvasSegmentsSet.SampleIds) { canvasSegmentsSet[sampleId].SetSet(segmentsSet); } var canvasSegments = new List <ISampleMap <CanvasSegment> >(); int nSegments = canvasSegmentsSet.First().Value.GetSet().Count; for (var canvasSegmentIndex = 0; canvasSegmentIndex < nSegments; canvasSegmentIndex++) { var canvasSegment = new SampleMap <CanvasSegment>(); foreach (var id in canvasSegmentsSet.SampleIds) { canvasSegment.Add(id, canvasSegmentsSet[id].GetSet()[canvasSegmentIndex]); } canvasSegments.Add(canvasSegment); } foreach (var canvasSegment in canvasSegments) { var copyNumbersLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegment, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins); var(_, likelihoods) = GetCopyNumbersNoPedigreeInfo(canvasSegment, copyNumbersLikelihoods); segmentSetLogLikelihood += likelihoods.MaximalLogLikelihood; } return(segmentSetLogLikelihood); }
/// <summary> /// Calculates maximal likelihood for segments with SNV allele counts given CopyNumber. Updated MajorChromosomeCount. /// </summary> private void AssignMccNoPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, Dictionary <int, List <PhasedGenotype> > genotypes) { const int diploidCopyNumber = 2; foreach (var sampleId in canvasSegments.SampleIds) { // variant caller does not attempt to call LOH, for DELs CN=MCC int copyNumber = canvasSegments[sampleId].CopyNumber; if (copyNumber <= diploidCopyNumber) { if (copyNumber == diploidCopyNumber) { canvasSegments[sampleId].MajorChromosomeCount = null; } else { canvasSegments[sampleId].MajorChromosomeCount = copyNumber; } continue; } var genotypeset = genotypes[copyNumber]; int? selectedGtState = null; double gqscore = GetGtLogLikelihoodScore(canvasSegments[sampleId].Balleles, genotypeset, ref selectedGtState, model[sampleId]); if (selectedGtState.HasValue) { canvasSegments[sampleId].MajorChromosomeCount = Math.Max(genotypeset[selectedGtState.Value].CopyNumberA, genotypeset[selectedGtState.Value].CopyNumberB); canvasSegments[sampleId].MajorChromosomeCountScore = gqscore; } } }
private static ISampleMap <List <CanvasSegment> > MergeSegments(ISampleMap <List <CanvasSegment> > segments, int minimumCallSize, int qScoreThreshold) { int nSegments = segments.First().Value.Count; var copyNumbers = new List <List <int> >(nSegments); var qscores = new List <double>(nSegments); foreach (int segmentIndex in Enumerable.Range(0, nSegments)) { copyNumbers.Add(segments.Select(s => s.Value[segmentIndex].CopyNumber).ToList()); qscores.Add(segments.Select(s => s.Value[segmentIndex].QScore).Average()); } if (copyNumbers == null && qscores != null || copyNumbers != null & qscores == null) { throw new ArgumentException("Both copyNumbers and qscores arguments must be specified."); } if (copyNumbers != null && copyNumbers.Count != nSegments) { throw new ArgumentException("Length of copyNumbers list should be equal to the number of segments."); } if (qscores != null && qscores.Count != nSegments) { throw new ArgumentException("Length of qscores list should be equal to the number of segments."); } var mergedSegments = new SampleMap <List <CanvasSegment> >(); foreach (var sampleSegments in segments) { var mergedSegmentsThisSample = CanvasSegment.MergeSegments(sampleSegments.Value.ToList(), minimumCallSize, 10000, copyNumbers, qscores, qScoreThreshold); mergedSegments.Add(sampleSegments.Key, mergedSegmentsThisSample); } return(mergedSegments); }
public static void WriteMultiSampleSegments(string outVcfPath, ISampleMap <List <CanvasSegment> > segments, List <double> diploidCoverage, string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, List <PloidyInfo> ploidies, int qualityThreshold, int?denovoQualityThreshold, int?sizeThreshold, bool isPedigreeInfoSupplied = true) { using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath)) { var genome = WriteVcfHeader(segments.Values.First(), diploidCoverage.Average(), wholeGenomeFastaDirectory, sampleNames, extraHeaders, writer, qualityThreshold, denovoQualityThreshold, sizeThreshold); WriteVariants(segments.Zip(), ploidies, genome, writer, denovoQualityThreshold); } }
public static SampleMap <Genotype> GetNonPedigreeCopyNumbers(ISampleMap <CanvasSegment> canvasSegments, PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleCopyNumberLogLikelihoods) { bool IsOther(SampleId sampleId) => pedigreeInfo.OtherIds.Contains(sampleId); var nonPedigreeMemberSegments = canvasSegments.WhereSampleIds(IsOther); var nonPedigreeMemberLikelihoods = singleSampleCopyNumberLogLikelihoods.WhereSampleIds(IsOther); (var nonPedigreeMemberCopyNumbers, _) = GetCopyNumbersNoPedigreeInfo(nonPedigreeMemberSegments, nonPedigreeMemberLikelihoods); return(nonPedigreeMemberCopyNumbers); }
/// <summary> /// Derives metrics from b-allele counts within each segment and determines whereas to use them for calculating MCC /// </summary> /// <param name="canvasSegments"></param> /// <param name="minAlleleCountsThreshold"></param> /// <param name="minAlleleNumberInSegment"></param> /// <returns></returns> public static bool UseAlleleCountsInformation(ISampleMap <CanvasSegment> canvasSegments, int minAlleleCountsThreshold, int minAlleleNumberInSegment) { var alleles = canvasSegments.Values.Select(segment => segment.Balleles?.TotalCoverage); // allele read coverage check var alleleCounts = alleles.Select(allele => allele?.Where(y => y >= minAlleleCountsThreshold).Count() ?? 0).ToList(); // number of SNVs in a segment check bool sufficientAlleleNum = alleleCounts.All(x => x >= minAlleleNumberInSegment); return(sufficientAlleleNum); }
public void AddJointLikelihood(ISampleMap <Genotype> samplesGenotypes, double likelihood) { if (_jointLikelihoods.ContainsKey(samplesGenotypes) && _jointLikelihoods[samplesGenotypes] < likelihood) { TotalMarginalLikelihood = TotalMarginalLikelihood + (likelihood - _jointLikelihoods[samplesGenotypes]); _jointLikelihoods[samplesGenotypes] = likelihood; } else if (!_jointLikelihoods.ContainsKey(samplesGenotypes)) { TotalMarginalLikelihood = TotalMarginalLikelihood + likelihood; _jointLikelihoods[samplesGenotypes] = likelihood; } }
/// <summary> /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount. /// </summary> private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo) { double maximalLogLikelihood = Double.NegativeInfinity; int parent1CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber; int parent2CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber; foreach (var parent1GtStates in _genotypes[parent1CopyNumber]) { foreach (var parent2GtStates in _genotypes[parent2CopyNumber]) { var bestChildGtStates = new List <PhasedGenotype>(); double currentLogLikelihood = 0; foreach (SampleId child in pedigreeInfo.OffspringIds) { int childCopyNumber = canvasSegments[child].CopyNumber; bool isInheritedCnv = !canvasSegments[child].DqScore.HasValue; double bestLogLikelihood = Double.NegativeInfinity; PhasedGenotype bestGtState = null; bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber, parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState); bestChildGtStates.Add(bestGtState); currentLogLikelihood += bestLogLikelihood; } currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) + GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates); currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood) ? Double.NegativeInfinity : currentLogLikelihood; if (currentLogLikelihood > maximalLogLikelihood) { maximalLogLikelihood = currentLogLikelihood; AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber); AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber); for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++) { var childId = pedigreeInfo.OffspringIds[childIndex]; var bestChildGtState = bestChildGtStates[childIndex]; if (bestChildGtState == null) { continue; } var childSegment = canvasSegments[childId]; AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber); } } } } }
/// <summary> /// For each segment shorter than 10kb, flag it as filtered. /// </summary> private void FilterExcessivelyShortSegments(ISampleMap <List <CanvasSegment> > segments) { string sizeFilter = CanvasFilter.GetCnvSizeFilter(CanvasFilter.SegmentSizeCutoff); foreach (var segmentList in segments.Values) { foreach (var segment in segmentList) { if (segment.Length >= CanvasFilter.SegmentSizeCutoff) { continue; } segment.Filter = segment.Filter.AddFilter(sizeFilter); } } }
public static bool IsSharedCnv(ISampleMap <Genotype> copyNumberGenotypes, ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, SampleId probandId, int maximumCopyNumber) { var proband = copyNumberGenotypes[probandId]; var parent1 = copyNumberGenotypes[parentIDs.First()]; var parent2 = copyNumberGenotypes[parentIDs.Last()]; if (proband.PhasedGenotype == null) { return(IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, maximumCopyNumber)); } return((proband.PhasedGenotype.ContainsSharedAlleleA(parent1.PhasedGenotype) && proband.PhasedGenotype.ContainsSharedAlleleB(parent2.PhasedGenotype)) || (proband.PhasedGenotype.ContainsSharedAlleleA(parent2.PhasedGenotype) && proband.PhasedGenotype.ContainsSharedAlleleB(parent1.PhasedGenotype))); }
private void EstimateQScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> pedigreeMembersInfo, PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods, JointLikelihoods copyNumberLikelihoods, ISampleMap <Genotype> copyNumbers) { foreach (var sampleId in canvasSegments.SampleIds) { canvasSegments[sampleId].QScore = GetSingleSampleQualityScore(singleSampleLikelihoods[sampleId], copyNumbers[sampleId]); canvasSegments[sampleId].CopyNumber = copyNumbers[sampleId].TotalCopyNumber; if (canvasSegments[sampleId].QScore < _qualityFilterThreshold) { canvasSegments[sampleId].Filter = CanvasFilter.Create(new[] { $"q{_qualityFilterThreshold}" }); } } if (pedigreeInfo.HasFullPedigree()) { SetDenovoQualityScores(canvasSegments, pedigreeMembersInfo, pedigreeInfo.ParentsIds, pedigreeInfo.OffspringIds, copyNumberLikelihoods); } }
public static PedigreeInfo GetPedigreeInfo(ISampleMap <SampleType> kinships, PedigreeCallerParameters callerParameters) { var allSampleIds = kinships.SampleIds.ToReadOnlyList(); bool fullPedigree = kinships.Values.Count(x => x == SampleType.Father) == 1 && kinships.Values.Count(x => x == SampleType.Mother) == 1 && kinships.Values.Count(x => x == SampleType.Proband) == 1; // do not populate parents and offspring fields for partial pedigrees var parentsIds = fullPedigree ? kinships.WhereValues(value => value == SampleType.Father || value == SampleType.Mother).SampleIds.ToList() : new List <SampleId>(); var offspringIds = fullPedigree ? kinships.WhereValues(value => value == SampleType.Proband || value == SampleType.Sibling).SampleIds.ToList() : new List <SampleId>(); var otherIds = fullPedigree ? kinships.WhereValues(value => value == SampleType.Other).SampleIds.ToList() : kinships.SampleIds.ToList(); var parentalPhasedGenotypes = GeneratePhasedGenotype(callerParameters.MaximumCopyNumber); var parentalTotalCopyNumberGenotypes = Enumerable.Range(0, callerParameters.MaximumCopyNumber).Select(Genotype.Create).ToList(); var offspringPhasedGenotypes = GetOffspringGenotypes(callerParameters, parentalPhasedGenotypes, offspringIds); var offspringTotalCopyNumberGenotypes = GetOffspringGenotypes(callerParameters, parentalTotalCopyNumberGenotypes, offspringIds); var transitionMatrix = GetTransitionMatrix(callerParameters.MaximumCopyNumber); return(new PedigreeInfo(allSampleIds, offspringIds, parentsIds, otherIds, offspringPhasedGenotypes, offspringTotalCopyNumberGenotypes, transitionMatrix)); }
/// <summary> /// Assess likelihood of a de-novo variant for copyNumberGenotypes configuration with a Mendelian conflict /// </summary> /// <param name="canvasSegments"></param> /// <param name="jointLikelihoods"></param> /// <param name="parentIDs"></param> /// <param name="probandId"></param> /// <param name="samplesInfo"></param> /// <returns></returns> internal static double GetConditionalDeNovoQualityScore(ISampleMap <CanvasSegment> canvasSegments, JointLikelihoods jointLikelihoods, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, SampleId probandId) { const double q60 = 0.000001; var parent1Ploidy = Genotype.Create(samplesInfo[parentIDs.First()].GetPloidy(canvasSegments[parentIDs.First()])); var parent2Ploidy = Genotype.Create(samplesInfo[parentIDs.Last()].GetPloidy(canvasSegments[parentIDs.Last()])); int probandPloidy = samplesInfo[probandId].GetPloidy(canvasSegments[probandId]); double deNovoGainMarginalLikelihood = jointLikelihoods.GetMarginalGainDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)), new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy)); double deNovoLossMarginalLikelihood = jointLikelihoods.GetMarginalLossDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)), new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy)); double denovoProbability = canvasSegments[probandId].CopyNumber > probandPloidy ? 1 - deNovoGainMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoLossMarginalLikelihood) : 1 - deNovoLossMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoGainMarginalLikelihood); // likelihood of proband genotype != ALT given "copyNumberGenotypes" configuration in pedigree with Mendelian conflict return(-10.0 * Math.Log10(Math.Max(denovoProbability, q60))); }
/// <summary> /// identify common variants using total CN calls within a pedigree obtained with coverage information only /// </summary> /// <param name="canvasSegments"></param> /// <param name="samplesInfo"></param> /// <param name="parentIDs"></param> /// <param name="probandId"></param> /// <param name="maximumCopyNumber"></param> /// <returns></returns> public static bool IsSharedCnv(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, SampleId probandId, int maximumCopyNumber) { int parent1CopyNumber = Math.Min(canvasSegments[parentIDs.First()].CopyNumber, maximumCopyNumber - 1); int parent2CopyNumber = Math.Min(canvasSegments[parentIDs.Last()].CopyNumber, maximumCopyNumber - 1); int probandCopyNumber = Math.Min(canvasSegments[probandId].CopyNumber, maximumCopyNumber - 1); var parent1Segment = canvasSegments[parentIDs.First()]; var parent2Segment = canvasSegments[parentIDs.Last()]; var probandSegment = canvasSegments[probandId]; int parent1Ploidy = samplesInfo[parentIDs.First()].GetPloidy(parent1Segment); int parent2Ploidy = samplesInfo[parentIDs.Last()].GetPloidy(parent2Segment); int probandPloidy = samplesInfo[probandId].GetPloidy(probandSegment); // Use the following logic: if the proband has fewer copies than expected (from ploidy) but both parents have at least the expected number of copies OR the // proband has more copies than expected but both parents have no more than the expected number of copies, // then it is not a 'common CNV' (i.e.it could be de novo); otherwise, it is common return(!(parent1CopyNumber <= parent1Ploidy && parent2CopyNumber <= parent2Ploidy && probandCopyNumber > probandPloidy || parent1CopyNumber >= parent1Ploidy && parent2CopyNumber >= parent2Ploidy && probandCopyNumber < probandPloidy)); }
/// <summary> /// Calculates maximal likelihood for segments without SNV allele ratios. Updated CanvasSegment CopyNumber only. /// Use likelihoods as only median point estimator is used /// </summary> public ISampleMap <Dictionary <Genotype, double> > GetCopyNumbersLikelihoods(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, int numberOfTrimmedBins) { var genotypes = Enumerable.Range(0, _maximumCopyNumber).Select(Genotype.Create).ToList(); const double maxCoverageMultiplier = 3.0; var singleSampleLikelihoods = new SampleMap <Dictionary <Genotype, double> >(); foreach (var sampleId in canvasSegments.SampleIds) { var density = new Dictionary <Genotype, double>(); foreach (var genotypeCopyNumber in genotypes) { double cvg = Math.Min(canvasSegments[sampleId].TruncatedMedianCount(numberOfTrimmedBins), samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier); // In case we run into out-of-range trouble again (CANV-694), print details { int intcvg = Convert.ToInt32(cvg); int coverageBound = copyNumberModel[sampleId].GetCoverageBound(); double truncatedDepth = canvasSegments[sampleId].TruncatedMedianCount(numberOfTrimmedBins); double meanTimesThree = samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier; int maxAllowedCN = copyNumberModel[sampleId].GetMaxCopyNumber(); if (intcvg >= coverageBound || genotypeCopyNumber.TotalCopyNumber > maxAllowedCN) { throw new ArgumentException( $"Tried to look up bad depth or CN for {sampleId}: depth {intcvg} CN {genotypeCopyNumber.TotalCopyNumber}" + $" where max handled values are {coverageBound} and {maxAllowedCN} respectively;" + $" original depth was {truncatedDepth}, mean * 3 was {meanTimesThree};" + $" segment {canvasSegments[sampleId].Chr}:{canvasSegments[sampleId].Begin}-{canvasSegments[sampleId].End}"); } } double currentLikelihood = copyNumberModel[sampleId].GetTotalCopyNumberLikelihoods(cvg, genotypeCopyNumber); currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood; density[genotypeCopyNumber] = currentLikelihood; } singleSampleLikelihoods.Add(sampleId, density); } return(singleSampleLikelihoods); }
/// <summary> /// Perform de-novo CNV calling in two steps: /// 1. Filter REF variants and common CNVs, this step relies only on total CN calls with associated shortcomings /// 2. Assign de-novo quality based on joint likelihood across pedigree using marginalisation operations /// </summary> /// <param name="canvasSegments"></param> /// <param name="samplesInfo"></param> /// <param name="parentIDs"></param> /// <param name="offspringIDs"></param> /// <param name="copyNumbersLikelihoods"></param> private void SetDenovoQualityScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, List <SampleId> offspringIDs, JointLikelihoods copyNumbersLikelihoods) { foreach (var probandId in offspringIDs) { // targeted proband is REF if (IsReferenceVariant(canvasSegments, samplesInfo, probandId)) { continue; } // common variant if (CanvasPedigreeCaller.IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, _callerParameters.MaximumCopyNumber)) { continue; } // other offsprings are ALT if (!offspringIDs.Except(probandId.ToEnumerable()).All(id => IsReferenceVariant(canvasSegments, samplesInfo, id))) { continue; } // not all q-scores are above the threshold if (parentIDs.Concat(probandId).Any(id => !IsPassVariant(canvasSegments, id))) { continue; } double deNovoQualityScore = CanvasPedigreeCaller.GetConditionalDeNovoQualityScore(canvasSegments, copyNumbersLikelihoods, samplesInfo, parentIDs, probandId); // adjustment so that denovo quality score threshold is 20 (rather than 10) to match Manta deNovoQualityScore *= 2; if (Double.IsInfinity(deNovoQualityScore) | deNovoQualityScore > _callerParameters.MaxQscore) { deNovoQualityScore = _callerParameters.MaxQscore; } canvasSegments[probandId].DqScore = deNovoQualityScore; } }
/// <summary> /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores /// </summary> public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins); (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404 if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasFullPedigree()) { AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo); } if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasOther()) { AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes); } }
public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel); // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with // coverage likelihood to form merged likelihoods int nBalleles = canvasSegments.Values.First().Balleles.Size(); // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods // into singleSampleLikelihoods using JoinLikelihoods function. var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles) : ConvertToLogLikelihood(coverageLikelihoods); // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); }
private static int GetCnState(ISampleMap <CanvasSegment> canvasSegmentsSet, SampleId sampleId, int maximumCopyNumber) { return(Math.Min(canvasSegmentsSet[sampleId].CopyNumber, maximumCopyNumber - 1)); }
public bool Equals(ISampleMap <Genotype> x, ISampleMap <Genotype> y) { return(x.SequenceEqual(y)); }
private IEnumerable <ISampleMap <CanvasSegment> > GetHighestLikelihoodSegments(IEnumerable <ISampleMap <OverlappingSegmentsRegion> > segmentSetsFromCommonCnvs, ISampleMap <SampleMetrics> pedigreeMembersInfo, ISampleMap <ICopyNumberModel> copyNumberModel) { var updatedSegmentSets = segmentSetsFromCommonCnvs .AsParallel() .AsOrdered() .WithDegreeOfParallelism(Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber)) .Select(segmentSet => { GetHighestLogLikelihoodSegmentsSet(segmentSet, pedigreeMembersInfo, copyNumberModel); return(segmentSet); }); return(updatedSegmentSets .SelectMany(sampleMap => sampleMap.SelectValues(x => x.GetSet().AsEnumerable()).Zip()) .ToList()); }
private static IEnumerable <ISampleMap <OverlappingSegmentsRegion> > GetOverlappingSegmentsRegionSampleLists(ISampleMap <List <OverlappingSegmentsRegion> > sampleRegions) { return(sampleRegions.Zip()); }
/// <summary> /// Estimate joint likelihood and most likely CN assignment within a pedigree using total CN Genotype likelihoods and transition matrix /// </summary> /// <param name="pedigreeInfo"></param> /// <param name="copyNumbersLikelihoods"></param> /// <returns></returns> private (ISampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetPedigreeCopyNumbers(PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > copyNumbersLikelihoods) { int nHighestLikelihoodGenotypes = pedigreeInfo != null && pedigreeInfo.OffspringIds.Count >= 2 ? 3 : _callerParameters.MaximumCopyNumber; copyNumbersLikelihoods = copyNumbersLikelihoods.SelectValues(l => l.OrderByDescending(kvp => kvp.Value).Take(nHighestLikelihoodGenotypes).ToDictionary()); var sampleCopyNumbersGenotypes = new SampleMap <Genotype>(); var jointLikelihood = new JointLikelihoods(); if (!pedigreeInfo.HasFullPedigree()) { return(sampleCopyNumbersGenotypes, jointLikelihood); } // parent 1 total CNs and likelihoods foreach (var copyNumberParent1 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.First()]) { // parent 2 total CNs and likelihoods foreach (var copyNumberParent2 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.Last()]) { // for offspring in addition to querying likelihoods using total CNs, iterate over all possible genotype combination (CopyNumberA/B) for a given // CN and estimate likely transition probabilities using TransitionMatrix foreach (var offspringGtStates in pedigreeInfo.OffspringPhasedGenotypes) { if (!pedigreeInfo.OffspringIds.All(id => copyNumbersLikelihoods[id].ContainsKey( Genotype.Create(Math.Min(offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberA + offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1))))) { // unavailable total CN continue; } // For a given combination of offspring copy numbers, only the genotypes that result in the maximum likelihood contribute to the final result." double currentLikelihood = copyNumberParent1.Value * copyNumberParent2.Value; var totalCopyNumberGenotypes = new List <Genotype>(); for (var counter = 0; counter < pedigreeInfo.OffspringIds.Count; counter++) { var child = pedigreeInfo.OffspringIds[counter]; var copyNumberGenotypeChild = Genotype.Create(Math.Min(offspringGtStates[counter].PhasedGenotype.CopyNumberA + offspringGtStates[counter].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1)); totalCopyNumberGenotypes.Add(copyNumberGenotypeChild); currentLikelihood *= pedigreeInfo.TransitionMatrix[copyNumberParent1.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberA] * pedigreeInfo.TransitionMatrix[copyNumberParent2.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberB] * copyNumbersLikelihoods[child][copyNumberGenotypeChild]; } currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood; var genotypesInPedigree = new SampleMap <Genotype> { { pedigreeInfo.ParentsIds.First(), copyNumberParent1.Key }, { pedigreeInfo.ParentsIds.Last(), copyNumberParent2.Key } }; pedigreeInfo.OffspringIds.Zip(totalCopyNumberGenotypes).ForEach(sampleIdGenotypeKvp => genotypesInPedigree.Add(sampleIdGenotypeKvp.Item1, sampleIdGenotypeKvp.Item2)); genotypesInPedigree = genotypesInPedigree.OrderBy(pedigreeInfo.AllSampleIds); jointLikelihood.AddJointLikelihood(genotypesInPedigree, currentLikelihood); double currentLogLikelihood = Math.Log(currentLikelihood); if (currentLogLikelihood > jointLikelihood.MaximalLogLikelihood) { jointLikelihood.MaximalLogLikelihood = currentLogLikelihood; sampleCopyNumbersGenotypes = genotypesInPedigree; } } } } if (sampleCopyNumbersGenotypes.Empty()) { throw new IlluminaException("Maximal likelihood was not found"); } return(sampleCopyNumbersGenotypes, jointLikelihood); }
/// <summary> /// Evaluate joint log likelihood of all genotype combinations across samples. /// Return joint likelihood object and the copy number states with the highest likelihood /// </summary> public static (SampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetCopyNumbersNoPedigreeInfo(ISampleMap <CanvasSegment> segments, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods) { // for non-pedigree samples JointLogLikelihoods object contains only maximum likelihood information var jointLogLikelihoods = new JointLikelihoods(); var sampleCopyNumbersGenotypes = new SampleMap <Genotype>(); foreach (var sampleId in segments.SampleIds) { var(copyNumber, maxSampleLikelihood) = singleSampleLikelihoods[sampleId].MaxBy(x => x.Value); jointLogLikelihoods.MaximalLogLikelihood += Math.Log(maxSampleLikelihood); sampleCopyNumbersGenotypes.Add(sampleId, copyNumber); } return(copyNumbersGenotypes : sampleCopyNumbersGenotypes, jointLikelihood : jointLogLikelihoods); }
private bool IsPassVariant(ISampleMap <CanvasSegment> canvasSegments, SampleId sampleId) { return(canvasSegments[sampleId].QScore >= _qualityFilterThreshold); }
private bool IsReferenceVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, SampleId sampleId) { var segment = canvasSegments[sampleId]; return(GetCnState(canvasSegments, sampleId, _callerParameters.MaximumCopyNumber) == samplesInfo[sampleId].GetPloidy(segment)); }
public int GetHashCode(ISampleMap <Genotype> obj) { return(obj.Aggregate(17, (hash, value) => hash + value.GetHashCode() * 31)); }
/// <summary> /// CreatRecordLevelFilter CanvasSegments from common CNVs bed file and overlap with CanvasPartition /// segments to create SegmentHaplotypes /// </summary> private IEnumerable <ISampleMap <OverlappingSegmentsRegion> > CreateSegmentSetsFromCommonCnvs(ISampleMap <string> variantFrequencyFiles, int defaultAlleleCountThreshold, string commonCNVsbedPath, ISampleMap <Segments> sampleSegments) { if (commonCNVsbedPath == null) { var defaultSampleRegions = sampleSegments .SelectValues(segments => segments.AllSegments.Select(segment => new OverlappingSegmentsRegion(segment)).ToList()); return(GetOverlappingSegmentsRegionSampleLists(defaultSampleRegions)); } var commonRegions = ReadCommonRegions(commonCNVsbedPath); var chromosomes = sampleSegments.Values.First().GetChromosomes(); if (IsIdenticalChromosomeNames(commonRegions, chromosomes)) { throw new ArgumentException( $"Chromosome names in a common CNVs bed file {commonCNVsbedPath} does not match the genome reference"); } var segmentIntervalsByChromosome = new Dictionary <string, List <BedInterval> >(); var genomicBinsByChromosome = new Dictionary <string, IReadOnlyList <SampleGenomicBin> >(); Parallel.ForEach( chromosomes, chr => { genomicBinsByChromosome[chr] = sampleSegments.Values.First().GetGenomicBinsForChromosome(chr); segmentIntervalsByChromosome[chr] = CanvasSegment.RemapGenomicToBinCoordinates(commonRegions[chr], genomicBinsByChromosome[chr]); }); var sampleRegions = new SampleMap <List <OverlappingSegmentsRegion> >(); foreach (var sampleId in sampleSegments.SampleIds) { var commonIntervals = commonRegions.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Select(bedEntry => bedEntry.Interval).ToList()); var allelesByChromosomeCommonSegs = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[sampleId]), commonIntervals); var segmentsSets = GetSegmentSets(defaultAlleleCountThreshold, commonRegions, genomicBinsByChromosome, segmentIntervalsByChromosome, allelesByChromosomeCommonSegs, sampleSegments[sampleId]); sampleRegions.Add(sampleId, segmentsSets); } return(GetOverlappingSegmentsRegionSampleLists(sampleRegions)); }