示例#1
0
        /// <summary>
        /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores
        /// </summary>
        private void GetHighestLogLikelihoodSegmentsSet(ISampleMap <OverlappingSegmentsRegion> canvasSegmentsSet, ISampleMap <SampleMetrics> pedigreeMembersInfo,
                                                        ISampleMap <ICopyNumberModel> model)
        {
            SegmentsSet segmentSet;

            if (canvasSegmentsSet.Values.First().SetA == null)
            {
                segmentSet = SegmentsSet.SetB;
            }
            else if (canvasSegmentsSet.Values.First().SetB == null)
            {
                segmentSet = SegmentsSet.SetA;
            }
            else
            {
                segmentSet = GetSegmentSetLogLikelihood(canvasSegmentsSet, pedigreeMembersInfo, model,
                                                        SegmentsSet.SetA) >
                             GetSegmentSetLogLikelihood(canvasSegmentsSet, pedigreeMembersInfo, model,
                                                        SegmentsSet.SetB)
                    ? SegmentsSet.SetA
                    : SegmentsSet.SetB;
            }

            canvasSegmentsSet.SampleIds.ForEach(id => canvasSegmentsSet[id].SetSet(segmentSet));
        }
        /// <summary>
        /// Calculates maximal likelihood for segments without SNV allele ratios. Updated CanvasSegment CopyNumber only.
        /// Use likelihoods as only median point estimator is used
        /// </summary>
        public ISampleMap <Dictionary <Genotype, double> > GetCopyNumbersLikelihoods(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                                                                     ISampleMap <ICopyNumberModel> copyNumberModel)
        {
            const int    bins2Remove             = 5;
            var          genotypes               = Enumerable.Range(0, _maximumCopyNumber).Select(Genotype.Create).ToList();
            const double maxCoverageMultiplier   = 3.0;
            var          singleSampleLikelihoods = new SampleMap <Dictionary <Genotype, double> >();

            foreach (var sampleId in canvasSegments.SampleIds)
            {
                var density = new Dictionary <Genotype, double>();

                foreach (var genotypeCopyNumber in genotypes)
                {
                    double currentLikelihood =
                        copyNumberModel[sampleId].GetTotalCopyNumberLikelihoods(
                            Math.Min(canvasSegments[sampleId].TruncatedMedianCount(bins2Remove),
                                     samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier), genotypeCopyNumber);
                    currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood)
                        ? 0
                        : currentLikelihood;
                    density[genotypeCopyNumber] = currentLikelihood;
                }
                singleSampleLikelihoods.Add(sampleId, density);
            }
            return(singleSampleLikelihoods);
        }
示例#3
0
        /// <summary>
        /// Given a set canvasSegmentsSet with two alternative segmentation hypothesis (SegmentsSet: SetA and SetB), return log likelihood
        /// for a segmentation hypothesis specified by segmentsSet. Segmentation hypothesis could typically include segmentation results specified
        /// by partitioning or annotations of population (common) variants
        /// </summary>
        /// <param name="canvasSegmentsSet"></param>
        /// <param name="samplesInfo"></param>
        /// <param name="copyNumberModel"></param>
        /// <param name="segmentsSet"></param>
        /// <returns></returns>
        private double GetSegmentSetLogLikelihood(ISampleMap <OverlappingSegmentsRegion> canvasSegmentsSet, ISampleMap <SampleMetrics> samplesInfo,
                                                  ISampleMap <ICopyNumberModel> copyNumberModel, SegmentsSet segmentsSet)
        {
            double segmentSetLogLikelihood = 0;

            foreach (var sampleId in canvasSegmentsSet.SampleIds)
            {
                canvasSegmentsSet[sampleId].SetSet(segmentsSet);
            }

            var canvasSegments = new List <ISampleMap <CanvasSegment> >();
            int nSegments      = canvasSegmentsSet.First().Value.GetSet().Count;

            for (var canvasSegmentIndex = 0; canvasSegmentIndex < nSegments; canvasSegmentIndex++)
            {
                var canvasSegment = new SampleMap <CanvasSegment>();
                foreach (var id in canvasSegmentsSet.SampleIds)
                {
                    canvasSegment.Add(id, canvasSegmentsSet[id].GetSet()[canvasSegmentIndex]);
                }
                canvasSegments.Add(canvasSegment);
            }
            foreach (var canvasSegment in canvasSegments)
            {
                var copyNumbersLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegment, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins);
                var(_, likelihoods)      = GetCopyNumbersNoPedigreeInfo(canvasSegment, copyNumbersLikelihoods);
                segmentSetLogLikelihood += likelihoods.MaximalLogLikelihood;
            }

            return(segmentSetLogLikelihood);
        }
示例#4
0
        /// <summary>
        /// Calculates maximal likelihood for segments with SNV allele counts given CopyNumber. Updated MajorChromosomeCount.
        /// </summary>
        private void AssignMccNoPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments,
                                             ISampleMap <ICopyNumberModel> model, Dictionary <int, List <PhasedGenotype> > genotypes)
        {
            const int diploidCopyNumber = 2;

            foreach (var sampleId in canvasSegments.SampleIds)
            {
                // variant caller does not attempt to call LOH, for DELs CN=MCC
                int copyNumber = canvasSegments[sampleId].CopyNumber;
                if (copyNumber <= diploidCopyNumber)
                {
                    if (copyNumber == diploidCopyNumber)
                    {
                        canvasSegments[sampleId].MajorChromosomeCount = null;
                    }
                    else
                    {
                        canvasSegments[sampleId].MajorChromosomeCount = copyNumber;
                    }
                    continue;
                }
                var    genotypeset     = genotypes[copyNumber];
                int?   selectedGtState = null;
                double gqscore         = GetGtLogLikelihoodScore(canvasSegments[sampleId].Balleles, genotypeset, ref selectedGtState, model[sampleId]);
                if (selectedGtState.HasValue)
                {
                    canvasSegments[sampleId].MajorChromosomeCount =
                        Math.Max(genotypeset[selectedGtState.Value].CopyNumberA,
                                 genotypeset[selectedGtState.Value].CopyNumberB);
                    canvasSegments[sampleId].MajorChromosomeCountScore = gqscore;
                }
            }
        }
示例#5
0
        private static ISampleMap <List <CanvasSegment> > MergeSegments(ISampleMap <List <CanvasSegment> > segments, int minimumCallSize, int qScoreThreshold)
        {
            int nSegments   = segments.First().Value.Count;
            var copyNumbers = new List <List <int> >(nSegments);
            var qscores     = new List <double>(nSegments);

            foreach (int segmentIndex in Enumerable.Range(0, nSegments))
            {
                copyNumbers.Add(segments.Select(s => s.Value[segmentIndex].CopyNumber).ToList());
                qscores.Add(segments.Select(s => s.Value[segmentIndex].QScore).Average());
            }

            if (copyNumbers == null && qscores != null || copyNumbers != null & qscores == null)
            {
                throw new ArgumentException("Both copyNumbers and qscores arguments must be specified.");
            }
            if (copyNumbers != null && copyNumbers.Count != nSegments)
            {
                throw new ArgumentException("Length of copyNumbers list should be equal to the number of segments.");
            }
            if (qscores != null && qscores.Count != nSegments)
            {
                throw new ArgumentException("Length of qscores list should be equal to the number of segments.");
            }

            var mergedSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var sampleSegments in segments)
            {
                var mergedSegmentsThisSample = CanvasSegment.MergeSegments(sampleSegments.Value.ToList(),
                                                                           minimumCallSize, 10000, copyNumbers, qscores, qScoreThreshold);
                mergedSegments.Add(sampleSegments.Key, mergedSegmentsThisSample);
            }
            return(mergedSegments);
        }
示例#6
0
 public static void WriteMultiSampleSegments(string outVcfPath, ISampleMap <List <CanvasSegment> > segments, List <double> diploidCoverage,
                                             string wholeGenomeFastaDirectory, List <string> sampleNames, List <string> extraHeaders, List <PloidyInfo> ploidies, int qualityThreshold, int?denovoQualityThreshold, int?sizeThreshold, bool isPedigreeInfoSupplied = true)
 {
     using (BgzipOrStreamWriter writer = new BgzipOrStreamWriter(outVcfPath))
     {
         var genome = WriteVcfHeader(segments.Values.First(), diploidCoverage.Average(), wholeGenomeFastaDirectory, sampleNames,
                                     extraHeaders, writer, qualityThreshold, denovoQualityThreshold, sizeThreshold);
         WriteVariants(segments.Zip(), ploidies, genome, writer, denovoQualityThreshold);
     }
 }
示例#7
0
        public static SampleMap <Genotype> GetNonPedigreeCopyNumbers(ISampleMap <CanvasSegment> canvasSegments, PedigreeInfo pedigreeInfo,
                                                                     ISampleMap <Dictionary <Genotype, double> > singleSampleCopyNumberLogLikelihoods)
        {
            bool IsOther(SampleId sampleId) => pedigreeInfo.OtherIds.Contains(sampleId);

            var nonPedigreeMemberSegments    = canvasSegments.WhereSampleIds(IsOther);
            var nonPedigreeMemberLikelihoods = singleSampleCopyNumberLogLikelihoods.WhereSampleIds(IsOther);

            (var nonPedigreeMemberCopyNumbers, _) = GetCopyNumbersNoPedigreeInfo(nonPedigreeMemberSegments, nonPedigreeMemberLikelihoods);
            return(nonPedigreeMemberCopyNumbers);
        }
示例#8
0
        /// <summary>
        /// Derives metrics from b-allele counts within each segment and determines whereas to use them for calculating MCC
        /// </summary>
        /// <param name="canvasSegments"></param>
        /// <param name="minAlleleCountsThreshold"></param>
        /// <param name="minAlleleNumberInSegment"></param>
        /// <returns></returns>
        public static bool UseAlleleCountsInformation(ISampleMap <CanvasSegment> canvasSegments,
                                                      int minAlleleCountsThreshold, int minAlleleNumberInSegment)
        {
            var alleles = canvasSegments.Values.Select(segment => segment.Balleles?.TotalCoverage);
            // allele read coverage check
            var alleleCounts = alleles.Select(allele => allele?.Where(y => y >= minAlleleCountsThreshold).Count() ?? 0).ToList();
            // number of SNVs in a segment check
            bool sufficientAlleleNum = alleleCounts.All(x => x >= minAlleleNumberInSegment);

            return(sufficientAlleleNum);
        }
示例#9
0
 public void AddJointLikelihood(ISampleMap <Genotype> samplesGenotypes, double likelihood)
 {
     if (_jointLikelihoods.ContainsKey(samplesGenotypes) && _jointLikelihoods[samplesGenotypes] < likelihood)
     {
         TotalMarginalLikelihood             = TotalMarginalLikelihood + (likelihood - _jointLikelihoods[samplesGenotypes]);
         _jointLikelihoods[samplesGenotypes] = likelihood;
     }
     else if (!_jointLikelihoods.ContainsKey(samplesGenotypes))
     {
         TotalMarginalLikelihood             = TotalMarginalLikelihood + likelihood;
         _jointLikelihoods[samplesGenotypes] = likelihood;
     }
 }
示例#10
0
        /// <summary>
        /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount.
        /// </summary>
        private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo)
        {
            double maximalLogLikelihood = Double.NegativeInfinity;
            int    parent1CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber;
            int    parent2CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber;

            foreach (var parent1GtStates in _genotypes[parent1CopyNumber])
            {
                foreach (var parent2GtStates in _genotypes[parent2CopyNumber])
                {
                    var    bestChildGtStates    = new List <PhasedGenotype>();
                    double currentLogLikelihood = 0;
                    foreach (SampleId child in pedigreeInfo.OffspringIds)
                    {
                        int            childCopyNumber   = canvasSegments[child].CopyNumber;
                        bool           isInheritedCnv    = !canvasSegments[child].DqScore.HasValue;
                        double         bestLogLikelihood = Double.NegativeInfinity;
                        PhasedGenotype bestGtState       = null;
                        bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber,
                                                                    parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState);
                        bestChildGtStates.Add(bestGtState);
                        currentLogLikelihood += bestLogLikelihood;
                    }
                    currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) +
                                            GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates);

                    currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood)
                        ? Double.NegativeInfinity
                        : currentLogLikelihood;

                    if (currentLogLikelihood > maximalLogLikelihood)
                    {
                        maximalLogLikelihood = currentLogLikelihood;
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber);
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber);
                        for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++)
                        {
                            var childId          = pedigreeInfo.OffspringIds[childIndex];
                            var bestChildGtState = bestChildGtStates[childIndex];
                            if (bestChildGtState == null)
                            {
                                continue;
                            }
                            var childSegment = canvasSegments[childId];
                            AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber);
                        }
                    }
                }
            }
        }
示例#11
0
        /// <summary>
        /// For each segment shorter than 10kb, flag it as filtered.
        /// </summary>
        private void FilterExcessivelyShortSegments(ISampleMap <List <CanvasSegment> > segments)
        {
            string sizeFilter = CanvasFilter.GetCnvSizeFilter(CanvasFilter.SegmentSizeCutoff);

            foreach (var segmentList in segments.Values)
            {
                foreach (var segment in segmentList)
                {
                    if (segment.Length >= CanvasFilter.SegmentSizeCutoff)
                    {
                        continue;
                    }
                    segment.Filter = segment.Filter.AddFilter(sizeFilter);
                }
            }
        }
示例#12
0
        public static bool IsSharedCnv(ISampleMap <Genotype> copyNumberGenotypes, ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs,
                                       SampleId probandId, int maximumCopyNumber)
        {
            var proband = copyNumberGenotypes[probandId];
            var parent1 = copyNumberGenotypes[parentIDs.First()];
            var parent2 = copyNumberGenotypes[parentIDs.Last()];

            if (proband.PhasedGenotype == null)
            {
                return(IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, maximumCopyNumber));
            }

            return((proband.PhasedGenotype.ContainsSharedAlleleA(parent1.PhasedGenotype) &&
                    proband.PhasedGenotype.ContainsSharedAlleleB(parent2.PhasedGenotype)) ||
                   (proband.PhasedGenotype.ContainsSharedAlleleA(parent2.PhasedGenotype) &&
                    proband.PhasedGenotype.ContainsSharedAlleleB(parent1.PhasedGenotype)));
        }
示例#13
0
 private void EstimateQScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> pedigreeMembersInfo,
                              PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods, JointLikelihoods copyNumberLikelihoods, ISampleMap <Genotype> copyNumbers)
 {
     foreach (var sampleId in canvasSegments.SampleIds)
     {
         canvasSegments[sampleId].QScore     = GetSingleSampleQualityScore(singleSampleLikelihoods[sampleId], copyNumbers[sampleId]);
         canvasSegments[sampleId].CopyNumber = copyNumbers[sampleId].TotalCopyNumber;
         if (canvasSegments[sampleId].QScore < _qualityFilterThreshold)
         {
             canvasSegments[sampleId].Filter = CanvasFilter.Create(new[] { $"q{_qualityFilterThreshold}" });
         }
     }
     if (pedigreeInfo.HasFullPedigree())
     {
         SetDenovoQualityScores(canvasSegments, pedigreeMembersInfo, pedigreeInfo.ParentsIds, pedigreeInfo.OffspringIds, copyNumberLikelihoods);
     }
 }
示例#14
0
        public static PedigreeInfo GetPedigreeInfo(ISampleMap <SampleType> kinships, PedigreeCallerParameters callerParameters)
        {
            var  allSampleIds = kinships.SampleIds.ToReadOnlyList();
            bool fullPedigree = kinships.Values.Count(x => x == SampleType.Father) == 1 &&
                                kinships.Values.Count(x => x == SampleType.Mother) == 1 &&
                                kinships.Values.Count(x => x == SampleType.Proband) == 1;
            // do not populate parents and offspring fields for partial pedigrees
            var parentsIds                        = fullPedigree ? kinships.WhereValues(value => value == SampleType.Father || value == SampleType.Mother).SampleIds.ToList() : new List <SampleId>();
            var offspringIds                      = fullPedigree ? kinships.WhereValues(value => value == SampleType.Proband || value == SampleType.Sibling).SampleIds.ToList() : new List <SampleId>();
            var otherIds                          = fullPedigree ? kinships.WhereValues(value => value == SampleType.Other).SampleIds.ToList() : kinships.SampleIds.ToList();
            var parentalPhasedGenotypes           = GeneratePhasedGenotype(callerParameters.MaximumCopyNumber);
            var parentalTotalCopyNumberGenotypes  = Enumerable.Range(0, callerParameters.MaximumCopyNumber).Select(Genotype.Create).ToList();
            var offspringPhasedGenotypes          = GetOffspringGenotypes(callerParameters, parentalPhasedGenotypes, offspringIds);
            var offspringTotalCopyNumberGenotypes = GetOffspringGenotypes(callerParameters, parentalTotalCopyNumberGenotypes, offspringIds);
            var transitionMatrix                  = GetTransitionMatrix(callerParameters.MaximumCopyNumber);

            return(new PedigreeInfo(allSampleIds, offspringIds, parentsIds, otherIds, offspringPhasedGenotypes, offspringTotalCopyNumberGenotypes, transitionMatrix));
        }
示例#15
0
        /// <summary>
        /// Assess likelihood of a de-novo variant for copyNumberGenotypes configuration with a Mendelian conflict
        /// </summary>
        /// <param name="canvasSegments"></param>
        /// <param name="jointLikelihoods"></param>
        /// <param name="parentIDs"></param>
        /// <param name="probandId"></param>
        /// <param name="samplesInfo"></param>
        /// <returns></returns>
        internal static double GetConditionalDeNovoQualityScore(ISampleMap <CanvasSegment> canvasSegments, JointLikelihoods jointLikelihoods, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, SampleId probandId)
        {
            const double q60           = 0.000001;
            var          parent1Ploidy = Genotype.Create(samplesInfo[parentIDs.First()].GetPloidy(canvasSegments[parentIDs.First()]));
            var          parent2Ploidy = Genotype.Create(samplesInfo[parentIDs.Last()].GetPloidy(canvasSegments[parentIDs.Last()]));
            int          probandPloidy = samplesInfo[probandId].GetPloidy(canvasSegments[probandId]);

            double deNovoGainMarginalLikelihood = jointLikelihoods.GetMarginalGainDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)),
                                                                                                   new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy));
            double deNovoLossMarginalLikelihood = jointLikelihoods.GetMarginalLossDeNovoLikelihood(new KeyValuePair <SampleId, Genotype>(probandId, Genotype.Create(probandPloidy)),
                                                                                                   new KeyValuePair <SampleId, Genotype>(parentIDs.First(), parent1Ploidy), new KeyValuePair <SampleId, Genotype>(parentIDs.Last(), parent2Ploidy));
            double denovoProbability = canvasSegments[probandId].CopyNumber > probandPloidy ?
                                       1 - deNovoGainMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoLossMarginalLikelihood) :
                                       1 - deNovoLossMarginalLikelihood / (jointLikelihoods.TotalMarginalLikelihood - deNovoGainMarginalLikelihood);

            // likelihood of proband genotype != ALT given "copyNumberGenotypes" configuration in pedigree with Mendelian conflict
            return(-10.0 * Math.Log10(Math.Max(denovoProbability, q60)));
        }
示例#16
0
        /// <summary>
        /// identify common variants using total CN calls within a pedigree obtained with coverage information only
        /// </summary>
        /// <param name="canvasSegments"></param>
        /// <param name="samplesInfo"></param>
        /// <param name="parentIDs"></param>
        /// <param name="probandId"></param>
        /// <param name="maximumCopyNumber"></param>
        /// <returns></returns>
        public static bool IsSharedCnv(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs,
                                       SampleId probandId, int maximumCopyNumber)
        {
            int parent1CopyNumber = Math.Min(canvasSegments[parentIDs.First()].CopyNumber, maximumCopyNumber - 1);
            int parent2CopyNumber = Math.Min(canvasSegments[parentIDs.Last()].CopyNumber, maximumCopyNumber - 1);
            int probandCopyNumber = Math.Min(canvasSegments[probandId].CopyNumber, maximumCopyNumber - 1);
            var parent1Segment    = canvasSegments[parentIDs.First()];
            var parent2Segment    = canvasSegments[parentIDs.Last()];
            var probandSegment    = canvasSegments[probandId];
            int parent1Ploidy     = samplesInfo[parentIDs.First()].GetPloidy(parent1Segment);
            int parent2Ploidy     = samplesInfo[parentIDs.Last()].GetPloidy(parent2Segment);
            int probandPloidy     = samplesInfo[probandId].GetPloidy(probandSegment);

            // Use the following logic: if the proband has fewer copies than expected (from ploidy) but both parents have at least the expected number of copies OR the
            // proband has more copies than expected but both parents have no more than the expected number of copies,
            // then it is not a 'common CNV' (i.e.it could be de novo); otherwise, it is common
            return(!(parent1CopyNumber <= parent1Ploidy && parent2CopyNumber <= parent2Ploidy && probandCopyNumber > probandPloidy ||
                     parent1CopyNumber >= parent1Ploidy && parent2CopyNumber >= parent2Ploidy && probandCopyNumber < probandPloidy));
        }
        /// <summary>
        /// Calculates maximal likelihood for segments without SNV allele ratios. Updated CanvasSegment CopyNumber only.
        /// Use likelihoods as only median point estimator is used
        /// </summary>
        public ISampleMap <Dictionary <Genotype, double> > GetCopyNumbersLikelihoods(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                                                                     ISampleMap <ICopyNumberModel> copyNumberModel, int numberOfTrimmedBins)
        {
            var          genotypes               = Enumerable.Range(0, _maximumCopyNumber).Select(Genotype.Create).ToList();
            const double maxCoverageMultiplier   = 3.0;
            var          singleSampleLikelihoods = new SampleMap <Dictionary <Genotype, double> >();

            foreach (var sampleId in canvasSegments.SampleIds)
            {
                var density = new Dictionary <Genotype, double>();

                foreach (var genotypeCopyNumber in genotypes)
                {
                    double cvg = Math.Min(canvasSegments[sampleId].TruncatedMedianCount(numberOfTrimmedBins),
                                          samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier);
                    // In case we run into out-of-range trouble again (CANV-694), print details
                    {
                        int    intcvg         = Convert.ToInt32(cvg);
                        int    coverageBound  = copyNumberModel[sampleId].GetCoverageBound();
                        double truncatedDepth = canvasSegments[sampleId].TruncatedMedianCount(numberOfTrimmedBins);
                        double meanTimesThree = samplesInfo[sampleId].MeanCoverage * maxCoverageMultiplier;
                        int    maxAllowedCN   = copyNumberModel[sampleId].GetMaxCopyNumber();
                        if (intcvg >= coverageBound || genotypeCopyNumber.TotalCopyNumber > maxAllowedCN)
                        {
                            throw new ArgumentException(
                                      $"Tried to look up bad depth or CN for {sampleId}: depth {intcvg} CN {genotypeCopyNumber.TotalCopyNumber}" +
                                      $" where max handled values are {coverageBound} and {maxAllowedCN} respectively;" +
                                      $" original depth was {truncatedDepth}, mean * 3 was {meanTimesThree};" +
                                      $" segment {canvasSegments[sampleId].Chr}:{canvasSegments[sampleId].Begin}-{canvasSegments[sampleId].End}");
                        }
                    }
                    double currentLikelihood =
                        copyNumberModel[sampleId].GetTotalCopyNumberLikelihoods(cvg, genotypeCopyNumber);
                    currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood)
                        ? 0
                        : currentLikelihood;
                    density[genotypeCopyNumber] = currentLikelihood;
                }
                singleSampleLikelihoods.Add(sampleId, density);
            }
            return(singleSampleLikelihoods);
        }
示例#18
0
        /// <summary>
        /// Perform de-novo CNV calling in two steps:
        /// 1. Filter REF variants and common CNVs, this step relies only on total CN calls with associated shortcomings
        /// 2. Assign de-novo quality based on joint likelihood across pedigree using marginalisation operations
        /// </summary>
        /// <param name="canvasSegments"></param>
        /// <param name="samplesInfo"></param>
        /// <param name="parentIDs"></param>
        /// <param name="offspringIDs"></param>
        /// <param name="copyNumbersLikelihoods"></param>
        private void SetDenovoQualityScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, List <SampleId> offspringIDs,
                                            JointLikelihoods copyNumbersLikelihoods)
        {
            foreach (var probandId in offspringIDs)
            {
                // targeted proband is REF
                if (IsReferenceVariant(canvasSegments, samplesInfo, probandId))
                {
                    continue;
                }
                // common variant
                if (CanvasPedigreeCaller.IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, _callerParameters.MaximumCopyNumber))
                {
                    continue;
                }
                // other offsprings are ALT
                if (!offspringIDs.Except(probandId.ToEnumerable()).All(id => IsReferenceVariant(canvasSegments, samplesInfo, id)))
                {
                    continue;
                }
                // not all q-scores are above the threshold
                if (parentIDs.Concat(probandId).Any(id => !IsPassVariant(canvasSegments, id)))
                {
                    continue;
                }
                double deNovoQualityScore = CanvasPedigreeCaller.GetConditionalDeNovoQualityScore(canvasSegments, copyNumbersLikelihoods, samplesInfo, parentIDs, probandId);

                // adjustment so that denovo quality score threshold is 20 (rather than 10) to match Manta
                deNovoQualityScore *= 2;

                if (Double.IsInfinity(deNovoQualityScore) | deNovoQualityScore > _callerParameters.MaxQscore)
                {
                    deNovoQualityScore = _callerParameters.MaxQscore;
                }
                canvasSegments[probandId].DqScore = deNovoQualityScore;
            }
        }
示例#19
0
        /// <summary>
        /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores
        /// </summary>
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins);

            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers);
            // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasFullPedigree())
            {
                AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo);
            }
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasOther())
            {
                AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes);
            }
        }
示例#20
0
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel);
            // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with
            // coverage likelihood to form merged likelihoods
            int nBalleles = canvasSegments.Values.First().Balleles.Size();
            // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods
            // into singleSampleLikelihoods using JoinLikelihoods function.
            var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments,
                                                                                          _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment)
                ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles)
                : ConvertToLogLikelihood(coverageLikelihoods);

            // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts
            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods,
                              pedigreeLikelihoods, mergedCopyNumbers);
        }
示例#21
0
 private static int GetCnState(ISampleMap <CanvasSegment> canvasSegmentsSet, SampleId sampleId, int maximumCopyNumber)
 {
     return(Math.Min(canvasSegmentsSet[sampleId].CopyNumber, maximumCopyNumber - 1));
 }
示例#22
0
 public bool Equals(ISampleMap <Genotype> x, ISampleMap <Genotype> y)
 {
     return(x.SequenceEqual(y));
 }
示例#23
0
        private IEnumerable <ISampleMap <CanvasSegment> > GetHighestLikelihoodSegments(IEnumerable <ISampleMap <OverlappingSegmentsRegion> > segmentSetsFromCommonCnvs,
                                                                                       ISampleMap <SampleMetrics> pedigreeMembersInfo, ISampleMap <ICopyNumberModel> copyNumberModel)
        {
            var updatedSegmentSets = segmentSetsFromCommonCnvs
                                     .AsParallel()
                                     .AsOrdered()
                                     .WithDegreeOfParallelism(Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber))
                                     .Select(segmentSet =>
            {
                GetHighestLogLikelihoodSegmentsSet(segmentSet, pedigreeMembersInfo, copyNumberModel);
                return(segmentSet);
            });

            return(updatedSegmentSets
                   .SelectMany(sampleMap => sampleMap.SelectValues(x => x.GetSet().AsEnumerable()).Zip())
                   .ToList());
        }
示例#24
0
 private static IEnumerable <ISampleMap <OverlappingSegmentsRegion> > GetOverlappingSegmentsRegionSampleLists(ISampleMap <List <OverlappingSegmentsRegion> > sampleRegions)
 {
     return(sampleRegions.Zip());
 }
示例#25
0
        /// <summary>
        /// Estimate joint likelihood and most likely CN assignment within a pedigree using total CN Genotype likelihoods and transition matrix
        /// </summary>
        /// <param name="pedigreeInfo"></param>
        /// <param name="copyNumbersLikelihoods"></param>
        /// <returns></returns>
        private (ISampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetPedigreeCopyNumbers(PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > copyNumbersLikelihoods)
        {
            int nHighestLikelihoodGenotypes = pedigreeInfo != null && pedigreeInfo.OffspringIds.Count >= 2 ? 3 : _callerParameters.MaximumCopyNumber;

            copyNumbersLikelihoods = copyNumbersLikelihoods.SelectValues(l => l.OrderByDescending(kvp => kvp.Value).Take(nHighestLikelihoodGenotypes).ToDictionary());

            var sampleCopyNumbersGenotypes = new SampleMap <Genotype>();
            var jointLikelihood            = new JointLikelihoods();

            if (!pedigreeInfo.HasFullPedigree())
            {
                return(sampleCopyNumbersGenotypes, jointLikelihood);
            }
            // parent 1 total CNs and likelihoods
            foreach (var copyNumberParent1 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.First()])
            {
                // parent 2 total CNs and likelihoods
                foreach (var copyNumberParent2 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.Last()])
                {
                    // for offspring in addition to querying likelihoods using total CNs, iterate over all possible genotype combination (CopyNumberA/B) for a given
                    // CN and estimate likely transition probabilities using TransitionMatrix
                    foreach (var offspringGtStates in pedigreeInfo.OffspringPhasedGenotypes)
                    {
                        if (!pedigreeInfo.OffspringIds.All(id => copyNumbersLikelihoods[id].ContainsKey(
                                                               Genotype.Create(Math.Min(offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberA + offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberB,
                                                                                        _callerParameters.MaximumCopyNumber - 1)))))
                        {
                            // unavailable total CN
                            continue;
                        }
                        // For a given combination of offspring copy numbers, only the genotypes that result in the maximum likelihood contribute to the final result."
                        double currentLikelihood        = copyNumberParent1.Value * copyNumberParent2.Value;
                        var    totalCopyNumberGenotypes = new List <Genotype>();
                        for (var counter = 0; counter < pedigreeInfo.OffspringIds.Count; counter++)
                        {
                            var child = pedigreeInfo.OffspringIds[counter];
                            var copyNumberGenotypeChild = Genotype.Create(Math.Min(offspringGtStates[counter].PhasedGenotype.CopyNumberA + offspringGtStates[counter].PhasedGenotype.CopyNumberB,
                                                                                   _callerParameters.MaximumCopyNumber - 1));
                            totalCopyNumberGenotypes.Add(copyNumberGenotypeChild);
                            currentLikelihood *= pedigreeInfo.TransitionMatrix[copyNumberParent1.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberA] *
                                                 pedigreeInfo.TransitionMatrix[copyNumberParent2.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberB] *
                                                 copyNumbersLikelihoods[child][copyNumberGenotypeChild];
                        }
                        currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood;

                        var genotypesInPedigree = new SampleMap <Genotype>
                        {
                            { pedigreeInfo.ParentsIds.First(), copyNumberParent1.Key },
                            { pedigreeInfo.ParentsIds.Last(), copyNumberParent2.Key }
                        };
                        pedigreeInfo.OffspringIds.Zip(totalCopyNumberGenotypes).ForEach(sampleIdGenotypeKvp => genotypesInPedigree.Add(sampleIdGenotypeKvp.Item1, sampleIdGenotypeKvp.Item2));
                        genotypesInPedigree = genotypesInPedigree.OrderBy(pedigreeInfo.AllSampleIds);
                        jointLikelihood.AddJointLikelihood(genotypesInPedigree, currentLikelihood);
                        double currentLogLikelihood = Math.Log(currentLikelihood);
                        if (currentLogLikelihood > jointLikelihood.MaximalLogLikelihood)
                        {
                            jointLikelihood.MaximalLogLikelihood = currentLogLikelihood;
                            sampleCopyNumbersGenotypes           = genotypesInPedigree;
                        }
                    }
                }
            }
            if (sampleCopyNumbersGenotypes.Empty())
            {
                throw new IlluminaException("Maximal likelihood was not found");
            }
            return(sampleCopyNumbersGenotypes, jointLikelihood);
        }
示例#26
0
        /// <summary>
        /// Evaluate joint log likelihood of all genotype combinations across samples.
        /// Return joint likelihood object and the copy number states with the highest likelihood
        /// </summary>
        public static (SampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetCopyNumbersNoPedigreeInfo(ISampleMap <CanvasSegment> segments,
                                                                                                                                 ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods)
        {
            // for non-pedigree samples JointLogLikelihoods object contains only maximum likelihood information
            var jointLogLikelihoods        = new JointLikelihoods();
            var sampleCopyNumbersGenotypes = new SampleMap <Genotype>();

            foreach (var sampleId in segments.SampleIds)
            {
                var(copyNumber, maxSampleLikelihood)      = singleSampleLikelihoods[sampleId].MaxBy(x => x.Value);
                jointLogLikelihoods.MaximalLogLikelihood += Math.Log(maxSampleLikelihood);
                sampleCopyNumbersGenotypes.Add(sampleId, copyNumber);
            }
            return(copyNumbersGenotypes : sampleCopyNumbersGenotypes, jointLikelihood : jointLogLikelihoods);
        }
示例#27
0
 private bool IsPassVariant(ISampleMap <CanvasSegment> canvasSegments, SampleId sampleId)
 {
     return(canvasSegments[sampleId].QScore >= _qualityFilterThreshold);
 }
示例#28
0
        private bool IsReferenceVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, SampleId sampleId)
        {
            var segment = canvasSegments[sampleId];

            return(GetCnState(canvasSegments, sampleId, _callerParameters.MaximumCopyNumber) == samplesInfo[sampleId].GetPloidy(segment));
        }
示例#29
0
 public int GetHashCode(ISampleMap <Genotype> obj)
 {
     return(obj.Aggregate(17, (hash, value) => hash + value.GetHashCode() * 31));
 }
示例#30
0
        /// <summary>
        /// CreatRecordLevelFilter CanvasSegments from common CNVs bed file and overlap with CanvasPartition
        /// segments to create SegmentHaplotypes
        /// </summary>
        private IEnumerable <ISampleMap <OverlappingSegmentsRegion> > CreateSegmentSetsFromCommonCnvs(ISampleMap <string> variantFrequencyFiles,
                                                                                                      int defaultAlleleCountThreshold, string commonCNVsbedPath, ISampleMap <Segments> sampleSegments)
        {
            if (commonCNVsbedPath == null)
            {
                var defaultSampleRegions = sampleSegments
                                           .SelectValues(segments => segments.AllSegments.Select(segment => new OverlappingSegmentsRegion(segment)).ToList());
                return(GetOverlappingSegmentsRegionSampleLists(defaultSampleRegions));
            }

            var commonRegions = ReadCommonRegions(commonCNVsbedPath);
            var chromosomes   = sampleSegments.Values.First().GetChromosomes();

            if (IsIdenticalChromosomeNames(commonRegions, chromosomes))
            {
                throw new ArgumentException(
                          $"Chromosome names in a common CNVs bed file {commonCNVsbedPath} does not match the genome reference");
            }

            var segmentIntervalsByChromosome = new Dictionary <string, List <BedInterval> >();
            var genomicBinsByChromosome      = new Dictionary <string, IReadOnlyList <SampleGenomicBin> >();

            Parallel.ForEach(
                chromosomes,
                chr =>
            {
                genomicBinsByChromosome[chr]      = sampleSegments.Values.First().GetGenomicBinsForChromosome(chr);
                segmentIntervalsByChromosome[chr] =
                    CanvasSegment.RemapGenomicToBinCoordinates(commonRegions[chr], genomicBinsByChromosome[chr]);
            });

            var sampleRegions = new SampleMap <List <OverlappingSegmentsRegion> >();

            foreach (var sampleId in sampleSegments.SampleIds)
            {
                var commonIntervals = commonRegions.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Select(bedEntry => bedEntry.Interval).ToList());
                var allelesByChromosomeCommonSegs = CanvasIO.ReadFrequenciesWrapper(_logger,
                                                                                    new FileLocation(variantFrequencyFiles[sampleId]), commonIntervals);
                var segmentsSets = GetSegmentSets(defaultAlleleCountThreshold, commonRegions,
                                                  genomicBinsByChromosome, segmentIntervalsByChromosome, allelesByChromosomeCommonSegs, sampleSegments[sampleId]);
                sampleRegions.Add(sampleId, segmentsSets);
            }

            return(GetOverlappingSegmentsRegionSampleLists(sampleRegions));
        }