Esempio n. 1
0
        internal static double GetGtLogLikelihoodScore(Balleles gtObservedCounts, List <PhasedGenotype> gtModelCounts, ref int?selectedGtState, ICopyNumberModel copyNumberModel)
        {
            const int maxGQscore       = 60;
            var       gtLogLikelihoods = Enumerable.Repeat(Double.NegativeInfinity, gtModelCounts.Count).ToList();
            var       gtModelCounter   = -1;

            foreach (var gtModelCount in gtModelCounts)
            {
                gtModelCounter++;
                // As we don't estimate allele CN but only MCC, focus on upper-triangle
                if (gtModelCount.CopyNumberA < gtModelCount.CopyNumberB)
                {
                    continue;
                }
                gtLogLikelihoods[gtModelCounter] = copyNumberModel.GetGenotypeLogLikelihood(gtObservedCounts, gtModelCount);
            }
            var maxLogLikelihood = gtLogLikelihoods.Max();

            if (!selectedGtState.HasValue)
            {
                selectedGtState = gtLogLikelihoods.IndexOf(maxLogLikelihood);
            }
            double normalizationConstant = gtLogLikelihoods.Sum(ll => Math.Exp(ll - maxLogLikelihood));
            double gqscore = -10.0 * Math.Log10((normalizationConstant - 1) / normalizationConstant);

            if (Double.IsInfinity(gqscore) | gqscore > maxGQscore)
            {
                gqscore = maxGQscore;
            }
            return(Double.IsNaN(gqscore) || Double.IsInfinity(gqscore) ? 0 : gqscore);
        }
Esempio n. 2
0
        public void MergeIn_PreviousSegment_KeepsBAllelesOrdered()
        {
            List <SampleGenomicBin> emptyBins = new List <SampleGenomicBin>();
            var bAllelesBefore = new Balleles(new List <Ballele> {
                new Ballele(1, 1, 1)
            });
            CanvasSegment segmentBefore = new CanvasSegment("chr1", 1, 2, emptyBins, bAllelesBefore);
            var           bAlleles      = new Balleles(new List <Ballele> {
                new Ballele(2, 1, 1)
            });
            CanvasSegment segment = new CanvasSegment("chr1", 2, 3, emptyBins, bAlleles);

            segment.MergeIn(segmentBefore);

            Assert.Equal(bAllelesBefore.Range.Concat(bAlleles.Range), segment.Balleles.Range);
        }
Esempio n. 3
0
        public void TestGetGtLogLikelihoodScore()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200,
                                                                            meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var simulatedCn   = 3;
            var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn)
                                .Select(gt => gt.PhasedGenotype).ToList();
            var gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 1, 73),
                new Ballele(100, 2, 74),
                new Ballele(200, 1, 76),
                new Ballele(300, 0, 74),
                new Ballele(400, 1, 75),
                new Ballele(500, 2, 74)
            });
            var gt0_3           = new PhasedGenotype(3, 0);
            int?selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            double logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);

            Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState);

            gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 23, 53),
                new Ballele(100, 22, 54),
                new Ballele(200, 25, 46),
                new Ballele(300, 24, 50),
                new Ballele(400, 26, 51),
                new Ballele(500, 24, 51)
            });
            var gt1_2 = new PhasedGenotype(2, 1);

            selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);
            Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState);
        }
Esempio n. 4
0
        public void HaplotypeCopyNumberModelTester_HetLoss()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var bAlleles = new Balleles(new List <Ballele>
            {
                new Ballele(1, 31, 1),
                new Ballele(100, 39, 2),
                new Ballele(200, 33, 3),
                new Ballele(300, 1, 33),
                new Ballele(400, 36, 2),
                new Ballele(500, 27, 1)
            });
            double gt10 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(1, 0));
            double gt20 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(2, 0));
            double gt01 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 1));

            Assert.True(gt10 > gt20);
            Assert.True(gt10 == gt01);
        }
Esempio n. 5
0
        public void HaplotypeCopyNumberModelTester_Gain_CN4()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 300, meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var bAlleles = new Balleles(new List <Ballele>
            {
                new Ballele(1, 200, 1),
                new Ballele(100, 202, 2),
                new Ballele(200, 209, 3),
                new Ballele(300, 1, 198),
                new Ballele(400, 201, 2),
                new Ballele(500, 199, 1)
            });
            double gt40 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(4, 0));
            double gt31 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(3, 1));
            double gt04 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 4));

            Assert.True(gt40 > gt31);
            Assert.True(gt40 == gt04);
        }
Esempio n. 6
0
        public void HaplotypeCopyNumberModelTester_PhasedGenotype_LossOfHeterozygosity()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var bAlleles = new Balleles(new List <Ballele>
            {
                new Ballele(1, 50, 1),
                new Ballele(100, 25, 24),
                new Ballele(200, 23, 27),
                new Ballele(300, 25, 24),
                new Ballele(400, 1, 50),
                new Ballele(500, 25, 25)
            });
            double diploidHet = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(1, 1));
            double lohB       = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 2));
            double lohA       = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(2, 0));

            Assert.True(diploidHet > lohB);
            Assert.True(diploidHet > lohA);

            var bAllelesLohWithNoise = new Balleles(new List <Ballele>
            {
                new Ballele(1, 53, 1),
                new Ballele(100, 50, 1),
                new Ballele(200, 47, 2),
                new Ballele(300, 46, 0),
                new Ballele(400, 48, 2),
                new Ballele(500, 53, 0)
            });

            diploidHet = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(1, 1));
            lohB       = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(0, 2));
            lohA       = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(2, 0));
            Assert.True(diploidHet < lohB);
            Assert.True(diploidHet < lohA);
        }
        public double GetGenotypeLogLikelihood(Balleles gtObservedCounts, PhasedGenotype gtModelCount)
        {
            double minLogLikelihood     = Math.Log(1.0 / Double.MaxValue);
            double currentLogLikelihood = 0;

            foreach (var gtCount in gtObservedCounts.GetTruncatedAlleleCounts())
            {
                int    rowId               = Math.Min(gtCount.Item1, _maxCoverage - 1);
                int    colId               = Math.Min(gtCount.Item2, _maxCoverage - 1);
                int    numHapsNonZero      = (gtModelCount.CopyNumberA > 0 ? 1 : 0) + (gtModelCount.CopyNumberB > 0 ? 1 : 0);
                double likelihoodThisLocus = 0;
                // the observations can arise from a het locus, if both copy numbers are positive
                if (numHapsNonZero == 2)
                {
                    // Given a variant locus with two haplotypes, we have a roughly 2/3 chance of it being het.
                    // Alleles have 50:50 chance of being 'A' or 'B'.
                    // We ignore error terms, as they should have a negligible impact here.
                    likelihoodThisLocus += 1.0 / 3.0 *
                                           (
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[rowId] *
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[colId]
                        +
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[colId] *
                        _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[rowId]
                                           );
                }
                // they can also arise from a hom locus in various ways
                if (numHapsNonZero > 0)
                {
                    // these should be constants to avoid calling Log over and over.
                    double logErrorProb   = Math.Log(0.01);
                    double logNoErrorProb = Math.Log(.99);
                    // If both haplotypes have non-zero depth and the locus is non-ref, a locus has a prior prob of 1/3 of being hom,
                    // assuming a well-mixed population.  We could adjust for observed het:hom, but we do not at this time.
                    // Of course, if only one haplotype has non-zero depth, it must be hom.
                    double priorFactorHom = numHapsNonZero == 2 ? 0.5 * (1.0 / 3.0) : 1.0;
                    // limit ttlReads to maxTotalDepth as that is all we have _readDepth probabilities for
                    int totalReads = Math.Min(rowId + colId, _maxAlleleCounts);
                    int totalCN    = gtModelCount.CopyNumberA + gtModelCount.CopyNumberB;
                    // Split the likelihood into two parts:
                    // First, the probability of getting the observed total number of reads, given the total copy number
                    double probTotalReadDepth = _totalAlleleCountsDistribution[totalCN][totalReads];
                    // Second, the probability of the observed per-allele read counts assuming one of the alleles is an error.
                    // The calculation here is simply binomial, in log space
                    double logProbCountAErrors = LogCombinations(rowId, colId) + rowId * logErrorProb + colId * logNoErrorProb;
                    double logProbCountBErrors = LogCombinations(rowId, colId) + colId * logErrorProb + rowId * logNoErrorProb;

                    likelihoodThisLocus += priorFactorHom * probTotalReadDepth * (
                        Math.Exp(logProbCountAErrors) + Math.Exp(logProbCountBErrors));
                }
                else
                {
                    // uses alleleStateZeroCorrector to enable non-zero likelihoods
                    int totalReads = Math.Min(rowId + colId, _maxAlleleCounts);
                    likelihoodThisLocus = _totalAlleleCountsDistribution[0][totalReads];
                }

                likelihoodThisLocus   = Math.Max(minLogLikelihood, likelihoodThisLocus);
                currentLogLikelihood += Math.Log(likelihoodThisLocus);
            }
            return(currentLogLikelihood);
        }
Esempio n. 8
0
        public void TestCommonCnvAssignment_DeNovoVariants()
        {
            var bins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 100)
            };
            var balleles = new Balleles(new List <Ballele> {
                new Ballele(5501, 30, 30)
            });
            var segmentParent1 = new CanvasSegment("chr1", 1, 2, bins, balleles)
            {
                CopyNumber = 2
            };

            bins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 100),
                new SampleGenomicBin("chr1", 1, 2, 100)
            };
            balleles = new Balleles(new List <Ballele> {
                new Ballele(5501, 30, 30)
            });
            var segmentParent2 = new CanvasSegment("chr1", 1, 2, bins, balleles)
            {
                CopyNumber = 2
            };

            bins = new List <SampleGenomicBin>
            {
                new SampleGenomicBin("chr1", 1, 2, 0),
                new SampleGenomicBin("chr1", 1, 2, 0),
                new SampleGenomicBin("chr1", 1, 2, 0)
            };
            balleles = new Balleles(new List <Ballele> {
                new Ballele(5501, 0, 0)
            });
            var segmentProband = new CanvasSegment("chr1", 1, 2, bins, balleles)
            {
                CopyNumber = 0
            };

            var pedigreeSegments = new SampleMap <CanvasSegment>
            {
                { new SampleId("parent1"), segmentParent1 },
                { new SampleId("parent2"), segmentParent2 },
                { new SampleId("proband"), segmentProband }
            };

            var sampleMetricsParent1 = SampleMetrics.GetSampleInfo(new List <CanvasSegment> {
                segmentParent1
            },
                                                                   ploidyBedPath: null,
                                                                   numberOfTrimmedBins: 2, id: new SampleId("parent1"));
            var sampleMetricsParent2 = SampleMetrics.GetSampleInfo(new List <CanvasSegment> {
                segmentParent2
            },
                                                                   ploidyBedPath: null,
                                                                   numberOfTrimmedBins: 2, id: new SampleId("parent2"));
            var sampleMetricsProband = SampleMetrics.GetSampleInfo(new List <CanvasSegment> {
                segmentProband
            },
                                                                   ploidyBedPath: null,
                                                                   numberOfTrimmedBins: 2, id: new SampleId("proband"));

            var sampleMetrics = new SampleMap <SampleMetrics>
            {
                { new SampleId("parent1"), sampleMetricsParent1 },
                { new SampleId("parent2"), sampleMetricsParent2 },
                { new SampleId("proband"), sampleMetricsProband }
            };

            bool isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeSegments, sampleMetrics,
                                                                                             new List <SampleId> {
                new SampleId("parent1"), new SampleId("parent2")
            },
                                                                                             new SampleId("proband"), maximumCopyNumber: 5);

            Assert.False(isCommonCnv);

            var pedigreeGenotypes = new SampleMap <Genotype>
            {
                { new SampleId("parent1"), Genotype.Create(new PhasedGenotype(1, 1)) },
                { new SampleId("parent2"), Genotype.Create(new PhasedGenotype(1, 1)) },
                { new SampleId("proband"), Genotype.Create(new PhasedGenotype(0, 1)) }
            };

            isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeGenotypes, pedigreeSegments,
                                                                                        sampleMetrics, new List <SampleId> {
                new SampleId("parent1"), new SampleId("parent2")
            },
                                                                                        new SampleId("proband"), maximumCopyNumber: 5);

            Assert.False(isCommonCnv);

            pedigreeGenotypes = new SampleMap <Genotype>
            {
                { new SampleId("parent1"), Genotype.Create(new PhasedGenotype(2, 1)) },
                { new SampleId("parent2"), Genotype.Create(new PhasedGenotype(1, 1)) },
                { new SampleId("proband"), Genotype.Create(new PhasedGenotype(0, 1)) }
            };

            isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeGenotypes, pedigreeSegments,
                                                                                        sampleMetrics, new List <SampleId> {
                new SampleId("parent1"), new SampleId("parent2")
            },
                                                                                        new SampleId("proband"), maximumCopyNumber: 5);

            Assert.False(isCommonCnv);
        }