Esempio n. 1
0
 private void EstimateQScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> pedigreeMembersInfo,
                              PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > singleSampleLikelihoods, JointLikelihoods copyNumberLikelihoods, ISampleMap <Genotype> copyNumbers)
 {
     foreach (var sampleId in canvasSegments.SampleIds)
     {
         canvasSegments[sampleId].QScore     = GetSingleSampleQualityScore(singleSampleLikelihoods[sampleId], copyNumbers[sampleId]);
         canvasSegments[sampleId].CopyNumber = copyNumbers[sampleId].TotalCopyNumber;
         if (canvasSegments[sampleId].QScore < _qualityFilterThreshold)
         {
             canvasSegments[sampleId].Filter = CanvasFilter.Create(new[] { $"q{_qualityFilterThreshold}" });
         }
     }
     if (pedigreeInfo.HasFullPedigree())
     {
         SetDenovoQualityScores(canvasSegments, pedigreeMembersInfo, pedigreeInfo.ParentsIds, pedigreeInfo.OffspringIds, copyNumberLikelihoods);
     }
 }
Esempio n. 2
0
        /// <summary>
        /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores
        /// </summary>
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins);

            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers);
            // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasFullPedigree())
            {
                AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo);
            }
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasOther())
            {
                AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes);
            }
        }
Esempio n. 3
0
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel);
            // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with
            // coverage likelihood to form merged likelihoods
            int nBalleles = canvasSegments.Values.First().Balleles.Size();
            // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods
            // into singleSampleLikelihoods using JoinLikelihoods function.
            var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments,
                                                                                          _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment)
                ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles)
                : ConvertToLogLikelihood(coverageLikelihoods);

            // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts
            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods,
                              pedigreeLikelihoods, mergedCopyNumbers);
        }
Esempio n. 4
0
        internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles,
                                  IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes)
        {
            // load files
            // initialize data structures and classes
            var fileCounter      = 0;
            var samplesInfo      = new SampleMap <SampleMetrics>();
            var sampleSegments   = new SampleMap <Segments>();
            var copyNumberModels = new SampleMap <ICopyNumberModel>();
            var variantFrequencyFilesSampleList = new SampleMap <string>();
            var kinships = new SampleMap <SampleType>();

            foreach (string sampleName in sampleNames)
            {
                var sampleId = new SampleId(sampleName);
                var segment  = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter]));
                segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome));
                sampleSegments.Add(sampleId, segment);
                var sampleInfo      = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId);
                var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage);
                samplesInfo.Add(sampleId, sampleInfo);
                copyNumberModels.Add(sampleId, copyNumberModel);
                variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]);
                kinships.Add(sampleId, sampleTypes[fileCounter]);
                fileCounter++;
            }
            var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList,
                                                                            _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments);

            var          segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList();
            PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters);

            Parallel.ForEach(
                segmentsForVariantCalling,
                new ParallelOptions
            {
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber)
            },
                segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo)
                );
            var variantCalledSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var key in samplesInfo.SampleIds)
            {
                variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList());
            }

            var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold);

            FilterExcessivelyShortSegments(mergedVariantCalledSegments);

            var outputFolder = outVcfFile.Directory;

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder,
                                                                                                  sampleId.ToString());
                CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage,
                                                    samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder);
            }
            bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree();
            var  denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null;
            var  ploidies        = samplesInfo.Select(info => info.Value.Ploidy).ToList();
            var  diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList();
            var  names           = samplesInfo.SampleIds.Select(id => id.ToString()).ToList();

            CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names,
                                                         null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied);

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString());
                var sampleMetrics = samplesInfo[sampleId];
                var segments      = mergedVariantCalledSegments[sampleId];
                CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments,
                                                  sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null,
                                                  sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null);

                var visualizationTemp   = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}");
                var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments);
                var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor);
                bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString()));
                var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString());
                _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph);

                var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points";
                var originalSegments        = sampleSegments[sampleId];
                _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader);
            }
            return(0);
        }
Esempio n. 5
0
        public static SampleMap <Genotype> GetNonPedigreeCopyNumbers(ISampleMap <CanvasSegment> canvasSegments, PedigreeInfo pedigreeInfo,
                                                                     ISampleMap <Dictionary <Genotype, double> > singleSampleCopyNumberLogLikelihoods)
        {
            bool IsOther(SampleId sampleId) => pedigreeInfo.OtherIds.Contains(sampleId);

            var nonPedigreeMemberSegments    = canvasSegments.WhereSampleIds(IsOther);
            var nonPedigreeMemberLikelihoods = singleSampleCopyNumberLogLikelihoods.WhereSampleIds(IsOther);

            (var nonPedigreeMemberCopyNumbers, _) = GetCopyNumbersNoPedigreeInfo(nonPedigreeMemberSegments, nonPedigreeMemberLikelihoods);
            return(nonPedigreeMemberCopyNumbers);
        }
Esempio n. 6
0
        /// <summary>
        /// Estimate joint likelihood and most likely CN assignment within a pedigree using total CN Genotype likelihoods and transition matrix
        /// </summary>
        /// <param name="pedigreeInfo"></param>
        /// <param name="copyNumbersLikelihoods"></param>
        /// <returns></returns>
        private (ISampleMap <Genotype> copyNumbersGenotypes, JointLikelihoods jointLikelihood) GetPedigreeCopyNumbers(PedigreeInfo pedigreeInfo, ISampleMap <Dictionary <Genotype, double> > copyNumbersLikelihoods)
        {
            int nHighestLikelihoodGenotypes = pedigreeInfo != null && pedigreeInfo.OffspringIds.Count >= 2 ? 3 : _callerParameters.MaximumCopyNumber;

            copyNumbersLikelihoods = copyNumbersLikelihoods.SelectValues(l => l.OrderByDescending(kvp => kvp.Value).Take(nHighestLikelihoodGenotypes).ToDictionary());

            var sampleCopyNumbersGenotypes = new SampleMap <Genotype>();
            var jointLikelihood            = new JointLikelihoods();

            if (!pedigreeInfo.HasFullPedigree())
            {
                return(sampleCopyNumbersGenotypes, jointLikelihood);
            }
            // parent 1 total CNs and likelihoods
            foreach (var copyNumberParent1 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.First()])
            {
                // parent 2 total CNs and likelihoods
                foreach (var copyNumberParent2 in copyNumbersLikelihoods[pedigreeInfo.ParentsIds.Last()])
                {
                    // for offspring in addition to querying likelihoods using total CNs, iterate over all possible genotype combination (CopyNumberA/B) for a given
                    // CN and estimate likely transition probabilities using TransitionMatrix
                    foreach (var offspringGtStates in pedigreeInfo.OffspringPhasedGenotypes)
                    {
                        if (!pedigreeInfo.OffspringIds.All(id => copyNumbersLikelihoods[id].ContainsKey(
                                                               Genotype.Create(Math.Min(offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberA + offspringGtStates[pedigreeInfo.OffspringIds.IndexOf(id)].PhasedGenotype.CopyNumberB,
                                                                                        _callerParameters.MaximumCopyNumber - 1)))))
                        {
                            // unavailable total CN
                            continue;
                        }
                        // For a given combination of offspring copy numbers, only the genotypes that result in the maximum likelihood contribute to the final result."
                        double currentLikelihood        = copyNumberParent1.Value * copyNumberParent2.Value;
                        var    totalCopyNumberGenotypes = new List <Genotype>();
                        for (var counter = 0; counter < pedigreeInfo.OffspringIds.Count; counter++)
                        {
                            var child = pedigreeInfo.OffspringIds[counter];
                            var copyNumberGenotypeChild = Genotype.Create(Math.Min(offspringGtStates[counter].PhasedGenotype.CopyNumberA + offspringGtStates[counter].PhasedGenotype.CopyNumberB,
                                                                                   _callerParameters.MaximumCopyNumber - 1));
                            totalCopyNumberGenotypes.Add(copyNumberGenotypeChild);
                            currentLikelihood *= pedigreeInfo.TransitionMatrix[copyNumberParent1.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberA] *
                                                 pedigreeInfo.TransitionMatrix[copyNumberParent2.Key.TotalCopyNumber][offspringGtStates[counter].PhasedGenotype.CopyNumberB] *
                                                 copyNumbersLikelihoods[child][copyNumberGenotypeChild];
                        }
                        currentLikelihood = Double.IsNaN(currentLikelihood) || Double.IsInfinity(currentLikelihood) ? 0 : currentLikelihood;

                        var genotypesInPedigree = new SampleMap <Genotype>
                        {
                            { pedigreeInfo.ParentsIds.First(), copyNumberParent1.Key },
                            { pedigreeInfo.ParentsIds.Last(), copyNumberParent2.Key }
                        };
                        pedigreeInfo.OffspringIds.Zip(totalCopyNumberGenotypes).ForEach(sampleIdGenotypeKvp => genotypesInPedigree.Add(sampleIdGenotypeKvp.Item1, sampleIdGenotypeKvp.Item2));
                        genotypesInPedigree = genotypesInPedigree.OrderBy(pedigreeInfo.AllSampleIds);
                        jointLikelihood.AddJointLikelihood(genotypesInPedigree, currentLikelihood);
                        double currentLogLikelihood = Math.Log(currentLikelihood);
                        if (currentLogLikelihood > jointLikelihood.MaximalLogLikelihood)
                        {
                            jointLikelihood.MaximalLogLikelihood = currentLogLikelihood;
                            sampleCopyNumbersGenotypes           = genotypesInPedigree;
                        }
                    }
                }
            }
            if (sampleCopyNumbersGenotypes.Empty())
            {
                throw new IlluminaException("Maximal likelihood was not found");
            }
            return(sampleCopyNumbersGenotypes, jointLikelihood);
        }
Esempio n. 7
0
        /// <summary>
        /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount.
        /// </summary>
        private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo)
        {
            double maximalLogLikelihood = Double.NegativeInfinity;
            int    parent1CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber;
            int    parent2CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber;

            foreach (var parent1GtStates in _genotypes[parent1CopyNumber])
            {
                foreach (var parent2GtStates in _genotypes[parent2CopyNumber])
                {
                    var    bestChildGtStates    = new List <PhasedGenotype>();
                    double currentLogLikelihood = 0;
                    foreach (SampleId child in pedigreeInfo.OffspringIds)
                    {
                        int            childCopyNumber   = canvasSegments[child].CopyNumber;
                        bool           isInheritedCnv    = !canvasSegments[child].DqScore.HasValue;
                        double         bestLogLikelihood = Double.NegativeInfinity;
                        PhasedGenotype bestGtState       = null;
                        bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber,
                                                                    parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState);
                        bestChildGtStates.Add(bestGtState);
                        currentLogLikelihood += bestLogLikelihood;
                    }
                    currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) +
                                            GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates);

                    currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood)
                        ? Double.NegativeInfinity
                        : currentLogLikelihood;

                    if (currentLogLikelihood > maximalLogLikelihood)
                    {
                        maximalLogLikelihood = currentLogLikelihood;
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber);
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber);
                        for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++)
                        {
                            var childId          = pedigreeInfo.OffspringIds[childIndex];
                            var bestChildGtState = bestChildGtStates[childIndex];
                            if (bestChildGtState == null)
                            {
                                continue;
                            }
                            var childSegment = canvasSegments[childId];
                            AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber);
                        }
                    }
                }
            }
        }