private void AssignMcc(CanvasSegment canvasSegment, ICopyNumberModel copyNumberModel, PhasedGenotype gtStates, int copyNumber) { const int diploidCopyNumber = 2; if (copyNumber > diploidCopyNumber) { canvasSegment.MajorChromosomeCount = Math.Max(gtStates.CopyNumberA, gtStates.CopyNumberB); int?selectedGtState = _genotypes[copyNumber].IndexOf(gtStates); canvasSegment.MajorChromosomeCountScore = GetGtLogLikelihoodScore(canvasSegment.Balleles, _genotypes[copyNumber], ref selectedGtState, copyNumberModel); } else { // variant caller does not attempt to call LOH, for DELs CN=MCC if (copyNumber == diploidCopyNumber) { canvasSegment.MajorChromosomeCount = null; } else { canvasSegment.MajorChromosomeCount = copyNumber; } canvasSegment.MajorChromosomeCountScore = null; } }
private bool IsGtPedigreeConsistent(PhasedGenotype parentGtStates, PhasedGenotype childGtStates) { if (parentGtStates.CopyNumberA == childGtStates.CopyNumberA || parentGtStates.CopyNumberB == childGtStates.CopyNumberA || parentGtStates.CopyNumberA == childGtStates.CopyNumberB || parentGtStates.CopyNumberB == childGtStates.CopyNumberB) { return(true); } return(false); }
private static ISampleMap <Dictionary <PhasedGenotype, double> > GetGenotypeLogLikelihoods(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> copyNumberModel, List <PhasedGenotype> genotypes) { var REF = new PhasedGenotype(1, 1); var loh = new List <PhasedGenotype> { new PhasedGenotype(0, 2), new PhasedGenotype(2, 0) }; var singleSampleLikelihoods = new SampleMap <Dictionary <PhasedGenotype, double> >(); foreach (var sampleId in canvasSegments.SampleIds) { var logLikelihoods = genotypes.Select(genotype => (genotype, copyNumberModel[sampleId].
/// <summary> /// Calculates maximal likelihood for genotypes given a copy number call. Updated MajorChromosomeCount. /// </summary> private void AssignMccWithPedigreeInfo(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <ICopyNumberModel> model, PedigreeInfo pedigreeInfo) { double maximalLogLikelihood = Double.NegativeInfinity; int parent1CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.First()].CopyNumber; int parent2CopyNumber = canvasSegments[pedigreeInfo.ParentsIds.Last()].CopyNumber; foreach (var parent1GtStates in _genotypes[parent1CopyNumber]) { foreach (var parent2GtStates in _genotypes[parent2CopyNumber]) { var bestChildGtStates = new List <PhasedGenotype>(); double currentLogLikelihood = 0; foreach (SampleId child in pedigreeInfo.OffspringIds) { int childCopyNumber = canvasSegments[child].CopyNumber; bool isInheritedCnv = !canvasSegments[child].DqScore.HasValue; double bestLogLikelihood = Double.NegativeInfinity; PhasedGenotype bestGtState = null; bestLogLikelihood = GetProbandLogLikelihood(model[child], childCopyNumber, parent1GtStates, parent2GtStates, isInheritedCnv, canvasSegments[child], bestLogLikelihood, ref bestGtState); bestChildGtStates.Add(bestGtState); currentLogLikelihood += bestLogLikelihood; } currentLogLikelihood += GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.First()], canvasSegments[pedigreeInfo.ParentsIds.First()], parent1GtStates) + GetCurrentGtLogLikelihood(model[pedigreeInfo.ParentsIds.Last()], canvasSegments[pedigreeInfo.ParentsIds.Last()], parent2GtStates); currentLogLikelihood = Double.IsNaN(currentLogLikelihood) || Double.IsInfinity(currentLogLikelihood) ? Double.NegativeInfinity : currentLogLikelihood; if (currentLogLikelihood > maximalLogLikelihood) { maximalLogLikelihood = currentLogLikelihood; AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.First()], model[pedigreeInfo.ParentsIds.First()], parent1GtStates, parent1CopyNumber); AssignMcc(canvasSegments[pedigreeInfo.ParentsIds.Last()], model[pedigreeInfo.ParentsIds.Last()], parent2GtStates, parent2CopyNumber); for (int childIndex = 0; childIndex < pedigreeInfo.OffspringIds.Count; childIndex++) { var childId = pedigreeInfo.OffspringIds[childIndex]; var bestChildGtState = bestChildGtStates[childIndex]; if (bestChildGtState == null) { continue; } var childSegment = canvasSegments[childId]; AssignMcc(childSegment, model[childId], bestChildGtState, childSegment.CopyNumber); } } } } }
public void TestGetGtLogLikelihoodScore() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var simulatedCn = 3; var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn) .Select(gt => gt.PhasedGenotype).ToList(); var gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 1, 73), new Ballele(100, 2, 74), new Ballele(200, 1, 76), new Ballele(300, 0, 74), new Ballele(400, 1, 75), new Ballele(500, 2, 74) }); var gt0_3 = new PhasedGenotype(3, 0); int?selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] double logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState); gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 23, 53), new Ballele(100, 22, 54), new Ballele(200, 25, 46), new Ballele(300, 24, 50), new Ballele(400, 26, 51), new Ballele(500, 24, 51) }); var gt1_2 = new PhasedGenotype(2, 1); selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState); }
private double GetProbandLogLikelihood(ICopyNumberModel copyNumberModel, int childCopyNumber, PhasedGenotype parent1GtStates, PhasedGenotype parent2GtStates, bool isInheritedCnv, CanvasSegment canvasSegment, double bestLogLikelihood, ref PhasedGenotype bestGtState) { foreach (var childGtState in _genotypes[childCopyNumber]) { double currentChildLogLikelihood; if (IsGtPedigreeConsistent(parent1GtStates, childGtState) && IsGtPedigreeConsistent(parent2GtStates, childGtState) && isInheritedCnv) { currentChildLogLikelihood = copyNumberModel.GetGenotypeLogLikelihood(canvasSegment.Balleles, childGtState); } else { continue; } if (currentChildLogLikelihood > bestLogLikelihood) { bestLogLikelihood = currentChildLogLikelihood; bestGtState = childGtState; } } return(bestLogLikelihood); }
public double GetGenotypeLogLikelihood(Balleles gtObservedCounts, PhasedGenotype gtModelCount) { double minLogLikelihood = Math.Log(1.0 / Double.MaxValue); double currentLogLikelihood = 0; foreach (var gtCount in gtObservedCounts.GetTruncatedAlleleCounts()) { int rowId = Math.Min(gtCount.Item1, _maxCoverage - 1); int colId = Math.Min(gtCount.Item2, _maxCoverage - 1); int numHapsNonZero = (gtModelCount.CopyNumberA > 0 ? 1 : 0) + (gtModelCount.CopyNumberB > 0 ? 1 : 0); double likelihoodThisLocus = 0; // the observations can arise from a het locus, if both copy numbers are positive if (numHapsNonZero == 2) { // Given a variant locus with two haplotypes, we have a roughly 2/3 chance of it being het. // Alleles have 50:50 chance of being 'A' or 'B'. // We ignore error terms, as they should have a negligible impact here. likelihoodThisLocus += 1.0 / 3.0 * ( _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[rowId] * _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[colId] + _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item1[colId] * _alleleDistribution[gtModelCount.CopyNumberA][gtModelCount.CopyNumberB].Item2[rowId] ); } // they can also arise from a hom locus in various ways if (numHapsNonZero > 0) { // these should be constants to avoid calling Log over and over. double logErrorProb = Math.Log(0.01); double logNoErrorProb = Math.Log(.99); // If both haplotypes have non-zero depth and the locus is non-ref, a locus has a prior prob of 1/3 of being hom, // assuming a well-mixed population. We could adjust for observed het:hom, but we do not at this time. // Of course, if only one haplotype has non-zero depth, it must be hom. double priorFactorHom = numHapsNonZero == 2 ? 0.5 * (1.0 / 3.0) : 1.0; // limit ttlReads to maxTotalDepth as that is all we have _readDepth probabilities for int totalReads = Math.Min(rowId + colId, _maxAlleleCounts); int totalCN = gtModelCount.CopyNumberA + gtModelCount.CopyNumberB; // Split the likelihood into two parts: // First, the probability of getting the observed total number of reads, given the total copy number double probTotalReadDepth = _totalAlleleCountsDistribution[totalCN][totalReads]; // Second, the probability of the observed per-allele read counts assuming one of the alleles is an error. // The calculation here is simply binomial, in log space double logProbCountAErrors = LogCombinations(rowId, colId) + rowId * logErrorProb + colId * logNoErrorProb; double logProbCountBErrors = LogCombinations(rowId, colId) + colId * logErrorProb + rowId * logNoErrorProb; likelihoodThisLocus += priorFactorHom * probTotalReadDepth * ( Math.Exp(logProbCountAErrors) + Math.Exp(logProbCountBErrors)); } else { // uses alleleStateZeroCorrector to enable non-zero likelihoods int totalReads = Math.Min(rowId + colId, _maxAlleleCounts); likelihoodThisLocus = _totalAlleleCountsDistribution[0][totalReads]; } likelihoodThisLocus = Math.Max(minLogLikelihood, likelihoodThisLocus); currentLogLikelihood += Math.Log(likelihoodThisLocus); } return(currentLogLikelihood); }
private static double GetCurrentGtLogLikelihood(ICopyNumberModel copyNumberModel, CanvasSegment canvasSegment, PhasedGenotype gtStates) { return(copyNumberModel.GetGenotypeLogLikelihood(canvasSegment.Balleles, gtStates)); }
public static Genotype Create(PhasedGenotype phasedGenotype) { return(new Genotype(phasedGenotype.CopyNumberA + phasedGenotype.CopyNumberB, phasedGenotype)); }
private Genotype(int totalCopyNumber, PhasedGenotype phasedGenotype) { TotalCopyNumber = totalCopyNumber; PhasedGenotype = phasedGenotype; }