internal static double GetGtLogLikelihoodScore(Balleles gtObservedCounts, List <PhasedGenotype> gtModelCounts, ref int?selectedGtState, ICopyNumberModel copyNumberModel) { const int maxGQscore = 60; var gtLogLikelihoods = Enumerable.Repeat(Double.NegativeInfinity, gtModelCounts.Count).ToList(); var gtModelCounter = -1; foreach (var gtModelCount in gtModelCounts) { gtModelCounter++; // As we don't estimate allele CN but only MCC, focus on upper-triangle if (gtModelCount.CopyNumberA < gtModelCount.CopyNumberB) { continue; } gtLogLikelihoods[gtModelCounter] = copyNumberModel.GetGenotypeLogLikelihood(gtObservedCounts, gtModelCount); } var maxLogLikelihood = gtLogLikelihoods.Max(); if (!selectedGtState.HasValue) { selectedGtState = gtLogLikelihoods.IndexOf(maxLogLikelihood); } double normalizationConstant = gtLogLikelihoods.Sum(ll => Math.Exp(ll - maxLogLikelihood)); double gqscore = -10.0 * Math.Log10((normalizationConstant - 1) / normalizationConstant); if (Double.IsInfinity(gqscore) | gqscore > maxGQscore) { gqscore = maxGQscore; } return(Double.IsNaN(gqscore) || Double.IsInfinity(gqscore) ? 0 : gqscore); }
public void MergeIn_PreviousSegment_KeepsBAllelesOrdered() { List <SampleGenomicBin> emptyBins = new List <SampleGenomicBin>(); var bAllelesBefore = new Balleles(new List <Ballele> { new Ballele(1, 1, 1) }); CanvasSegment segmentBefore = new CanvasSegment("chr1", 1, 2, emptyBins, bAllelesBefore); var bAlleles = new Balleles(new List <Ballele> { new Ballele(2, 1, 1) }); CanvasSegment segment = new CanvasSegment("chr1", 2, 3, emptyBins, bAlleles); segment.MergeIn(segmentBefore); Assert.Equal(bAllelesBefore.Range.Concat(bAlleles.Range), segment.Balleles.Range); }
public void TestGetGtLogLikelihoodScore() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var simulatedCn = 3; var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn) .Select(gt => gt.PhasedGenotype).ToList(); var gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 1, 73), new Ballele(100, 2, 74), new Ballele(200, 1, 76), new Ballele(300, 0, 74), new Ballele(400, 1, 75), new Ballele(500, 2, 74) }); var gt0_3 = new PhasedGenotype(3, 0); int?selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] double logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState); gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 23, 53), new Ballele(100, 22, 54), new Ballele(200, 25, 46), new Ballele(300, 24, 50), new Ballele(400, 26, 51), new Ballele(500, 24, 51) }); var gt1_2 = new PhasedGenotype(2, 1); selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState); }
public void HaplotypeCopyNumberModelTester_HetLoss() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var bAlleles = new Balleles(new List <Ballele> { new Ballele(1, 31, 1), new Ballele(100, 39, 2), new Ballele(200, 33, 3), new Ballele(300, 1, 33), new Ballele(400, 36, 2), new Ballele(500, 27, 1) }); double gt10 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(1, 0)); double gt20 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(2, 0)); double gt01 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 1)); Assert.True(gt10 > gt20); Assert.True(gt10 == gt01); }
public void HaplotypeCopyNumberModelTester_Gain_CN4() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 300, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var bAlleles = new Balleles(new List <Ballele> { new Ballele(1, 200, 1), new Ballele(100, 202, 2), new Ballele(200, 209, 3), new Ballele(300, 1, 198), new Ballele(400, 201, 2), new Ballele(500, 199, 1) }); double gt40 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(4, 0)); double gt31 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(3, 1)); double gt04 = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 4)); Assert.True(gt40 > gt31); Assert.True(gt40 == gt04); }
public void HaplotypeCopyNumberModelTester_PhasedGenotype_LossOfHeterozygosity() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var bAlleles = new Balleles(new List <Ballele> { new Ballele(1, 50, 1), new Ballele(100, 25, 24), new Ballele(200, 23, 27), new Ballele(300, 25, 24), new Ballele(400, 1, 50), new Ballele(500, 25, 25) }); double diploidHet = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(1, 1)); double lohB = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(0, 2)); double lohA = copyNumberModel.GetGenotypeLogLikelihood(bAlleles, new PhasedGenotype(2, 0)); Assert.True(diploidHet > lohB); Assert.True(diploidHet > lohA); var bAllelesLohWithNoise = new Balleles(new List <Ballele> { new Ballele(1, 53, 1), new Ballele(100, 50, 1), new Ballele(200, 47, 2), new Ballele(300, 46, 0), new Ballele(400, 48, 2), new Ballele(500, 53, 0) }); diploidHet = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(1, 1)); lohB = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(0, 2)); lohA = copyNumberModel.GetGenotypeLogLikelihood(bAllelesLohWithNoise, new PhasedGenotype(2, 0)); Assert.True(diploidHet < lohB); Assert.True(diploidHet < lohA); }
public double GetGenotypeLogLikelihood(Balleles gtObservedCounts, PhasedGenotype gtModelCount) { double minLogLikelihood = Math.Log(1.0 / Double.MaxValue); double currentLogLikelihood = 0; foreach (var gtCount in gtObservedCounts.GetTruncatedAlleleCounts()) { int rowId = Math.Min(gtCount.Item1, _maxCoverage - 1); int colId = Math.Min(gtCount.Item2, _maxCoverage - 1); int numHapsNonZero = (gtModelCount.CopyNumberA > 0 ? 1 : 0) + (gtModelCount.CopyNumberB > 0 ? 1 : 0); double likelihoodThisLocus = 0; // the observations can arise from a het locus, if both copy numbers are positive if (numHapsNonZero == 2) { // Given a variant locus with two haplotypes, we have a roughly 2/3 chance of it being het. // Alleles have 50:50 chance of being 'A' or 'B'. // We ignore error terms, as they should have a negligible impact here. likelihoodThisLocus += 1.0 / 3.0 * ( _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[rowId] * _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[colId] + _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[colId] * _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[rowId] ); } // they can also arise from a hom locus in various ways if (numHapsNonZero > 0) { // these should be constants to avoid calling Log over and over. double logErrorProb = Math.Log(0.01); double logNoErrorProb = Math.Log(.99); // If both haplotypes have non-zero depth and the locus is non-ref, a locus has a prior prob of 1/3 of being hom, // assuming a well-mixed population. We could adjust for observed het:hom, but we do not at this time. // Of course, if only one haplotype has non-zero depth, it must be hom. double priorFactorHom = numHapsNonZero == 2 ? 0.5 * (1.0 / 3.0) : 1.0; // limit ttlReads to maxTotalDepth as that is all we have _readDepth probabilities for int totalReads = Math.Min(rowId + colId, _maxAlleleCounts); int totalCN = gtModelCount.CopyNumberA + gtModelCount.CopyNumberB; // Split the likelihood into two parts: // First, the probability of getting the observed total number of reads, given the total copy number double probTotalReadDepth = _totalAlleleCountsDistribution[totalCN][totalReads]; // Second, the probability of the observed per-allele read counts assuming one of the alleles is an error. // The calculation here is simply binomial, in log space double logProbCountAErrors = LogCombinations(rowId, colId) + rowId * logErrorProb + colId * logNoErrorProb; double logProbCountBErrors = LogCombinations(rowId, colId) + colId * logErrorProb + rowId * logNoErrorProb; likelihoodThisLocus += priorFactorHom * probTotalReadDepth * ( Math.Exp(logProbCountAErrors) + Math.Exp(logProbCountBErrors)); } else { // uses alleleStateZeroCorrector to enable non-zero likelihoods int totalReads = Math.Min(rowId + colId, _maxAlleleCounts); likelihoodThisLocus = _totalAlleleCountsDistribution[0][totalReads]; } likelihoodThisLocus = Math.Max(minLogLikelihood, likelihoodThisLocus); currentLogLikelihood += Math.Log(likelihoodThisLocus); } return(currentLogLikelihood); }
public void TestCommonCnvAssignment_DeNovoVariants() { var bins = new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 1, 2, 100), new SampleGenomicBin("chr1", 1, 2, 100), new SampleGenomicBin("chr1", 1, 2, 100) }; var balleles = new Balleles(new List <Ballele> { new Ballele(5501, 30, 30) }); var segmentParent1 = new CanvasSegment("chr1", 1, 2, bins, balleles) { CopyNumber = 2 }; bins = new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 1, 2, 100), new SampleGenomicBin("chr1", 1, 2, 100), new SampleGenomicBin("chr1", 1, 2, 100) }; balleles = new Balleles(new List <Ballele> { new Ballele(5501, 30, 30) }); var segmentParent2 = new CanvasSegment("chr1", 1, 2, bins, balleles) { CopyNumber = 2 }; bins = new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 1, 2, 0), new SampleGenomicBin("chr1", 1, 2, 0), new SampleGenomicBin("chr1", 1, 2, 0) }; balleles = new Balleles(new List <Ballele> { new Ballele(5501, 0, 0) }); var segmentProband = new CanvasSegment("chr1", 1, 2, bins, balleles) { CopyNumber = 0 }; var pedigreeSegments = new SampleMap <CanvasSegment> { { new SampleId("parent1"), segmentParent1 }, { new SampleId("parent2"), segmentParent2 }, { new SampleId("proband"), segmentProband } }; var sampleMetricsParent1 = SampleMetrics.GetSampleInfo(new List <CanvasSegment> { segmentParent1 }, ploidyBedPath: null, numberOfTrimmedBins: 2, id: new SampleId("parent1")); var sampleMetricsParent2 = SampleMetrics.GetSampleInfo(new List <CanvasSegment> { segmentParent2 }, ploidyBedPath: null, numberOfTrimmedBins: 2, id: new SampleId("parent2")); var sampleMetricsProband = SampleMetrics.GetSampleInfo(new List <CanvasSegment> { segmentProband }, ploidyBedPath: null, numberOfTrimmedBins: 2, id: new SampleId("proband")); var sampleMetrics = new SampleMap <SampleMetrics> { { new SampleId("parent1"), sampleMetricsParent1 }, { new SampleId("parent2"), sampleMetricsParent2 }, { new SampleId("proband"), sampleMetricsProband } }; bool isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeSegments, sampleMetrics, new List <SampleId> { new SampleId("parent1"), new SampleId("parent2") }, new SampleId("proband"), maximumCopyNumber: 5); Assert.False(isCommonCnv); var pedigreeGenotypes = new SampleMap <Genotype> { { new SampleId("parent1"), Genotype.Create(new PhasedGenotype(1, 1)) }, { new SampleId("parent2"), Genotype.Create(new PhasedGenotype(1, 1)) }, { new SampleId("proband"), Genotype.Create(new PhasedGenotype(0, 1)) } }; isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeGenotypes, pedigreeSegments, sampleMetrics, new List <SampleId> { new SampleId("parent1"), new SampleId("parent2") }, new SampleId("proband"), maximumCopyNumber: 5); Assert.False(isCommonCnv); pedigreeGenotypes = new SampleMap <Genotype> { { new SampleId("parent1"), Genotype.Create(new PhasedGenotype(2, 1)) }, { new SampleId("parent2"), Genotype.Create(new PhasedGenotype(1, 1)) }, { new SampleId("proband"), Genotype.Create(new PhasedGenotype(0, 1)) } }; isCommonCnv = global::CanvasPedigreeCaller.CanvasPedigreeCaller.IsSharedCnv(pedigreeGenotypes, pedigreeSegments, sampleMetrics, new List <SampleId> { new SampleId("parent1"), new SampleId("parent2") }, new SampleId("proband"), maximumCopyNumber: 5); Assert.False(isCommonCnv); }