Exemple #1
0
        private void AssignMcc(CanvasSegment canvasSegment, ICopyNumberModel copyNumberModel,
                               PhasedGenotype gtStates, int copyNumber)
        {
            const int diploidCopyNumber = 2;

            if (copyNumber > diploidCopyNumber)
            {
                canvasSegment.MajorChromosomeCount =
                    Math.Max(gtStates.CopyNumberA, gtStates.CopyNumberB);
                int?selectedGtState = _genotypes[copyNumber].IndexOf(gtStates);
                canvasSegment.MajorChromosomeCountScore = GetGtLogLikelihoodScore(canvasSegment.Balleles, _genotypes[copyNumber], ref selectedGtState, copyNumberModel);
            }
            else
            {
                // variant caller does not attempt to call LOH, for DELs CN=MCC
                if (copyNumber == diploidCopyNumber)
                {
                    canvasSegment.MajorChromosomeCount = null;
                }
                else
                {
                    canvasSegment.MajorChromosomeCount = copyNumber;
                }
                canvasSegment.MajorChromosomeCountScore = null;
            }
        }
Exemple #2
0
 private bool IsGtPedigreeConsistent(PhasedGenotype parentGtStates, PhasedGenotype childGtStates)
 {
     if (parentGtStates.CopyNumberA == childGtStates.CopyNumberA || parentGtStates.CopyNumberB == childGtStates.CopyNumberA ||
         parentGtStates.CopyNumberA == childGtStates.CopyNumberB || parentGtStates.CopyNumberB == childGtStates.CopyNumberB)
     {
         return(true);
     }
     return(false);
 }
Exemple #3
0
        private static ISampleMap <Dictionary <PhasedGenotype, double> > GetGenotypeLogLikelihoods(ISampleMap <CanvasSegment> canvasSegments,
                                                                                                   ISampleMap <ICopyNumberModel> copyNumberModel, List <PhasedGenotype> genotypes)
        {
            var REF = new PhasedGenotype(1, 1);
            var loh = new List <PhasedGenotype> {
                new PhasedGenotype(0, 2), new PhasedGenotype(2, 0)
            };

            var singleSampleLikelihoods = new SampleMap <Dictionary <PhasedGenotype, double> >();

            foreach (var sampleId in canvasSegments.SampleIds)
            {
                var logLikelihoods = genotypes.Select(genotype => (genotype, copyNumberModel[sampleId].
Exemple #4
0
        /// <summary>
        /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount.
        /// </summary>
        private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo)
        {
            double maximalLogLikelihood = Double.NegativeInfinity;
            int    parent1CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber;
            int    parent2CopyNumber    = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber;

            foreach (var parent1GtStates in _genotypes[parent1CopyNumber])
            {
                foreach (var parent2GtStates in _genotypes[parent2CopyNumber])
                {
                    var    bestChildGtStates    = new List <PhasedGenotype>();
                    double currentLogLikelihood = 0;
                    foreach (SampleId child in pedigreeInfo.OffspringIds)
                    {
                        int            childCopyNumber   = canvasSegments[child].CopyNumber;
                        bool           isInheritedCnv    = !canvasSegments[child].DqScore.HasValue;
                        double         bestLogLikelihood = Double.NegativeInfinity;
                        PhasedGenotype bestGtState       = null;
                        bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber,
                                                                    parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState);
                        bestChildGtStates.Add(bestGtState);
                        currentLogLikelihood += bestLogLikelihood;
                    }
                    currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) +
                                            GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates);

                    currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood)
                        ? Double.NegativeInfinity
                        : currentLogLikelihood;

                    if (currentLogLikelihood > maximalLogLikelihood)
                    {
                        maximalLogLikelihood = currentLogLikelihood;
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber);
                        AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber);
                        for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++)
                        {
                            var childId          = pedigreeInfo.OffspringIds[childIndex];
                            var bestChildGtState = bestChildGtStates[childIndex];
                            if (bestChildGtState == null)
                            {
                                continue;
                            }
                            var childSegment = canvasSegments[childId];
                            AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber);
                        }
                    }
                }
            }
        }
Exemple #5
0
        public void TestGetGtLogLikelihoodScore()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200,
                                                                            meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var simulatedCn   = 3;
            var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn)
                                .Select(gt => gt.PhasedGenotype).ToList();
            var gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 1, 73),
                new Ballele(100, 2, 74),
                new Ballele(200, 1, 76),
                new Ballele(300, 0, 74),
                new Ballele(400, 1, 75),
                new Ballele(500, 2, 74)
            });
            var gt0_3           = new PhasedGenotype(3, 0);
            int?selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            double logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);

            Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState);

            gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 23, 53),
                new Ballele(100, 22, 54),
                new Ballele(200, 25, 46),
                new Ballele(300, 24, 50),
                new Ballele(400, 26, 51),
                new Ballele(500, 24, 51)
            });
            var gt1_2 = new PhasedGenotype(2, 1);

            selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);
            Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState);
        }
Exemple #6
0
 private double GetProbandLogLikelihood(ICopyNumberModel copyNumberModel, int childCopyNumber, PhasedGenotype parent1GtStates, PhasedGenotype parent2GtStates, bool isInheritedCnv, CanvasSegment canvasSegment,
                                        double bestLogLikelihood, ref PhasedGenotype bestGtState)
 {
     foreach (var childGtState in _genotypes[childCopyNumber])
     {
         double currentChildLogLikelihood;
         if (IsGtPedigreeConsistent(parent1GtStates, childGtState) &&
             IsGtPedigreeConsistent(parent2GtStates, childGtState) &&
             isInheritedCnv)
         {
             currentChildLogLikelihood = copyNumberModel.GetGenotypeLogLikelihood(canvasSegment.Balleles, childGtState);
         }
         else
         {
             continue;
         }
         if (currentChildLogLikelihood > bestLogLikelihood)
         {
             bestLogLikelihood = currentChildLogLikelihood;
             bestGtState       = childGtState;
         }
     }
     return(bestLogLikelihood);
 }
        public double GetGenotypeLogLikelihood(Balleles gtObservedCounts, PhasedGenotype gtModelCount)
        {
            double minLogLikelihood     = Math.Log(1.0 / Double.MaxValue);
            double currentLogLikelihood = 0;

            foreach (var gtCount in gtObservedCounts.GetTruncatedAlleleCounts())
            {
                int    rowId               = Math.Min(gtCount.Item1, _maxCoverage - 1);
                int    colId               = Math.Min(gtCount.Item2, _maxCoverage - 1);
                int    numHapsNonZero      = (gtModelCount.CopyNumberA > 0 ? 1 : 0) + (gtModelCount.CopyNumberB > 0 ? 1 : 0);
                double likelihoodThisLocus = 0;
                // the observations can arise from a het locus, if both copy numbers are positive
                if (numHapsNonZero == 2)
                {
                    // Given a variant locus with two haplotypes, we have a roughly 2/3 chance of it being het.
                    // Alleles have 50:50 chance of being 'A' or 'B'.
                    // We ignore error terms, as they should have a negligible impact here.
                    likelihoodThisLocus += 1.0 / 3.0 *
                                           (
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[rowId] *
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[colId]
                        +
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[colId] *
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[rowId]
                                           );
                }
                // they can also arise from a hom locus in various ways
                if (numHapsNonZero > 0)
                {
                    // these should be constants to avoid calling Log over and over.
                    double logErrorProb   = Math.Log(0.01);
                    double logNoErrorProb = Math.Log(.99);
                    // If both haplotypes have non-zero depth and the locus is non-ref, a locus has a prior prob of 1/3 of being hom,
                    // assuming a well-mixed population.  We could adjust for observed het:hom, but we do not at this time.
                    // Of course, if only one haplotype has non-zero depth, it must be hom.
                    double priorFactorHom = numHapsNonZero == 2 ? 0.5 * (1.0 / 3.0) : 1.0;
                    // limit ttlReads to maxTotalDepth as that is all we have _readDepth probabilities for
                    int totalReads = Math.Min(rowId + colId, _maxAlleleCounts);
                    int totalCN    = gtModelCount.CopyNumberA + gtModelCount.CopyNumberB;
                    // Split the likelihood into two parts:
                    // First, the probability of getting the observed total number of reads, given the total copy number
                    double probTotalReadDepth = _totalAlleleCountsDistribution[totalCN][totalReads];
                    // Second, the probability of the observed per-allele read counts assuming one of the alleles is an error.
                    // The calculation here is simply binomial, in log space
                    double logProbCountAErrors = LogCombinations(rowId, colId) + rowId * logErrorProb + colId * logNoErrorProb;
                    double logProbCountBErrors = LogCombinations(rowId, colId) + colId * logErrorProb + rowId * logNoErrorProb;

                    likelihoodThisLocus += priorFactorHom * probTotalReadDepth * (
                        Math.Exp(logProbCountAErrors) + Math.Exp(logProbCountBErrors));
                }
                else
                {
                    // uses alleleStateZeroCorrector to enable non-zero likelihoods
                    int totalReads = Math.Min(rowId + colId, _maxAlleleCounts);
                    likelihoodThisLocus = _totalAlleleCountsDistribution[0][totalReads];
                }

                likelihoodThisLocus   = Math.Max(minLogLikelihood, likelihoodThisLocus);
                currentLogLikelihood += Math.Log(likelihoodThisLocus);
            }
            return(currentLogLikelihood);
        }
Exemple #8
0
 private static double GetCurrentGtLogLikelihood(ICopyNumberModel copyNumberModel, CanvasSegment canvasSegment, PhasedGenotype gtStates)
 {
     return(copyNumberModel.GetGenotypeLogLikelihood(canvasSegment.Balleles, gtStates));
 }
Exemple #9
0
 public static Genotype Create(PhasedGenotype phasedGenotype)
 {
     return(new Genotype(phasedGenotype.CopyNumberA + phasedGenotype.CopyNumberB, phasedGenotype));
 }
Exemple #10
0
 private Genotype(int totalCopyNumber, PhasedGenotype phasedGenotype)
 {
     TotalCopyNumber = totalCopyNumber;
     PhasedGenotype  = phasedGenotype;
 }