private void EstimateQScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> pedigreeMembersInfo, PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods, JointLikelihoods copyNumberLikelihoods, ISampleMap <Genotype> copyNumbers) { foreach (var sampleId in canvasSegments.SampleIds) { canvasSegments[sampleId].QScore = GetSingleSampleQualityScore(singleSampleLikelihoods[sampleId], copyNumbers[sampleId]); canvasSegments[sampleId].CopyNumber = copyNumbers[sampleId].TotalCopyNumber; if (canvasSegments[sampleId].QScore < _qualityFilterThreshold) { canvasSegments[sampleId].Filter = CanvasFilter.Create(new[] { $"q{_qualityFilterThreshold}" }); } } if (pedigreeInfo.HasFullPedigree()) { SetDenovoQualityScores(canvasSegments, pedigreeMembersInfo, pedigreeInfo.ParentsIds, pedigreeInfo.OffspringIds, copyNumberLikelihoods); } }
/// <summary> /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores /// </summary> public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins); (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404 if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasFullPedigree()) { AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo); } if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasOther()) { AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes); } }
public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel); // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with // coverage likelihood to form merged likelihoods int nBalleles = canvasSegments.Values.First().Balleles.Size(); // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods // into singleSampleLikelihoods using JoinLikelihoods function. var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles) : ConvertToLogLikelihood(coverageLikelihoods); // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); }
internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles, IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes) { // load files // initialize data structures and classes var fileCounter = 0; var samplesInfo = new SampleMap <SampleMetrics>(); var sampleSegments = new SampleMap <Segments>(); var copyNumberModels = new SampleMap <ICopyNumberModel>(); var variantFrequencyFilesSampleList = new SampleMap <string>(); var kinships = new SampleMap <SampleType>(); foreach (string sampleName in sampleNames) { var sampleId = new SampleId(sampleName); var segment = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter])); segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome)); sampleSegments.Add(sampleId, segment); var sampleInfo = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId); var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage); samplesInfo.Add(sampleId, sampleInfo); copyNumberModels.Add(sampleId, copyNumberModel); variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]); kinships.Add(sampleId, sampleTypes[fileCounter]); fileCounter++; } var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList, _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments); var segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList(); PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters); Parallel.ForEach( segmentsForVariantCalling, new ParallelOptions { MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber) }, segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo) ); var variantCalledSegments = new SampleMap <List <CanvasSegment> >(); foreach (var key in samplesInfo.SampleIds) { variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList()); } var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold); FilterExcessivelyShortSegments(mergedVariantCalledSegments); var outputFolder = outVcfFile.Directory; foreach (var sampleId in samplesInfo.SampleIds) { var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder, sampleId.ToString()); CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage, samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder); } bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree(); var denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null; var ploidies = samplesInfo.Select(info => info.Value.Ploidy).ToList(); var diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList(); var names = samplesInfo.SampleIds.Select(id => id.ToString()).ToList(); CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names, null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied); foreach (var sampleId in samplesInfo.SampleIds) { var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString()); var sampleMetrics = samplesInfo[sampleId]; var segments = mergedVariantCalledSegments[sampleId]; CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments, sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null, sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null); var visualizationTemp = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}"); var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments); var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor); bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString())); var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString()); _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph); var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points"; var originalSegments = sampleSegments[sampleId]; _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader); } return(0); }
public static SampleMap <Genotype> GetNonPedigreeCopyNumbers(ISampleMap <CanvasSegment> canvasSegments, PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleCopyNumberLogLikelihoods) { bool IsOther(SampleId sampleId) => pedigreeInfo.OtherIds.Contains(sampleId); var nonPedigreeMemberSegments = canvasSegments.WhereSampleIds(IsOther); var nonPedigreeMemberLikelihoods = singleSampleCopyNumberLogLikelihoods.WhereSampleIds(IsOther); (var nonPedigreeMemberCopyNumbers, _) = GetCopyNumbersNoPedigreeInfo(nonPedigreeMemberSegments, nonPedigreeMemberLikelihoods); return(nonPedigreeMemberCopyNumbers); }
/// <summary> /// Estimate joint likelihood and most likely CN assignment within a pedigree using total CN Genotype likelihoods and transition matrix /// </summary> /// <param name="pedigreeInfo"></param> /// <param name="copyNumbersLikelihoods"></param> /// <returns></returns> private (ISampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetPedigreeCopyNumbers(PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > copyNumbersLikelihoods) { int nHighestLikelihoodGenotypes = pedigreeInfo != null && pedigreeInfo.OffspringIds.Count >= 2 ? 3 : _callerParameters.MaximumCopyNumber; copyNumbersLikelihoods = copyNumbersLikelihoods.SelectValues(l => l.OrderByDescending(kvp => kvp.Value).Take(nHighestLikelihoodGenotypes).ToDictionary()); var sampleCopyNumbersGenotypes = new SampleMap <Genotype>(); var jointLikelihood = new JointLikelihoods(); if (!pedigreeInfo.HasFullPedigree()) { return(sampleCopyNumbersGenotypes, jointLikelihood); } // parent 1 total CNs and likelihoods foreach (var copyNumberParent1 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.First()]) { // parent 2 total CNs and likelihoods foreach (var copyNumberParent2 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.Last()]) { // for offspring in addition to querying likelihoods using total CNs, iterate over all possible genotype combination (CopyNumberA/B) for a given // CN and estimate likely transition probabilities using TransitionMatrix foreach (var offspringGtStates in pedigreeInfo.OffspringPhasedGenotypes) { if (!pedigreeInfo.OffspringIds.All(id => copyNumbersLikelihoods[id].ContainsKey( Genotype.Create(Math.Min(offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberA + offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1))))) { // unavailable total CN continue; } // For a given combination of offspring copy numbers, only the genotypes that result in the maximum likelihood contribute to the final result." double currentLikelihood = copyNumberParent1.Value * copyNumberParent2.Value; var totalCopyNumberGenotypes = new List <Genotype>(); for (var counter = 0; counter < pedigreeInfo.OffspringIds.Count; counter++) { var child = pedigreeInfo.OffspringIds[counter]; var copyNumberGenotypeChild = Genotype.Create(Math.Min(offspringGtStates[counter].PhasedGenotype.CopyNumberA + offspringGtStates[counter].PhasedGenotype.CopyNumberB, _callerParameters.MaximumCopyNumber - 1)); totalCopyNumberGenotypes.Add(copyNumberGenotypeChild); currentLikelihood *= pedigreeInfo.TransitionMatrix[copyNumberParent1.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberA] * pedigreeInfo.TransitionMatrix[copyNumberParent2.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberB] * copyNumbersLikelihoods[child][copyNumberGenotypeChild]; } currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood; var genotypesInPedigree = new SampleMap <Genotype> { { pedigreeInfo.ParentsIds.First(), copyNumberParent1.Key }, { pedigreeInfo.ParentsIds.Last(), copyNumberParent2.Key } }; pedigreeInfo.OffspringIds.Zip(totalCopyNumberGenotypes).ForEach(sampleIdGenotypeKvp => genotypesInPedigree.Add(sampleIdGenotypeKvp.Item1, sampleIdGenotypeKvp.Item2)); genotypesInPedigree = genotypesInPedigree.OrderBy(pedigreeInfo.AllSampleIds); jointLikelihood.AddJointLikelihood(genotypesInPedigree, currentLikelihood); double currentLogLikelihood = Math.Log(currentLikelihood); if (currentLogLikelihood > jointLikelihood.MaximalLogLikelihood) { jointLikelihood.MaximalLogLikelihood = currentLogLikelihood; sampleCopyNumbersGenotypes = genotypesInPedigree; } } } } if (sampleCopyNumbersGenotypes.Empty()) { throw new IlluminaException("Maximal likelihood was not found"); } return(sampleCopyNumbersGenotypes, jointLikelihood); }
/// <summary> /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount. /// </summary> private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo) { double maximalLogLikelihood = Double.NegativeInfinity; int parent1CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber; int parent2CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber; foreach (var parent1GtStates in _genotypes[parent1CopyNumber]) { foreach (var parent2GtStates in _genotypes[parent2CopyNumber]) { var bestChildGtStates = new List <PhasedGenotype>(); double currentLogLikelihood = 0; foreach (SampleId child in pedigreeInfo.OffspringIds) { int childCopyNumber = canvasSegments[child].CopyNumber; bool isInheritedCnv = !canvasSegments[child].DqScore.HasValue; double bestLogLikelihood = Double.NegativeInfinity; PhasedGenotype bestGtState = null; bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber, parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState); bestChildGtStates.Add(bestGtState); currentLogLikelihood += bestLogLikelihood; } currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) + GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates); currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood) ? Double.NegativeInfinity : currentLogLikelihood; if (currentLogLikelihood > maximalLogLikelihood) { maximalLogLikelihood = currentLogLikelihood; AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber); AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber); for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++) { var childId = pedigreeInfo.OffspringIds[childIndex]; var bestChildGtState = bestChildGtStates[childIndex]; if (bestChildGtState == null) { continue; } var childSegment = canvasSegments[childId]; AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber); } } } } }