/// <summary> /// Setup: Model various copy ploidies. /// </summary> public void InitializePloidies() { Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now); _allPloidies = new List <SegmentPloidy>(); Utilities.EstimateDiploidMAF(2, MeanCoverage); for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++) { for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--) { SegmentPloidy ploidy = new SegmentPloidy { CopyNumber = copyNumber, MajorChromosomeCount = majorCount, Index = _allPloidies.Count }; _allPloidies.Add(ploidy); if (copyNumber == 0) { ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate continue; } float variantFrequency = majorCount / (float)copyNumber; ploidy.MinorAlleleFrequency = variantFrequency < 0.5 ? variantFrequency : 1 - variantFrequency; if (majorCount * 2 == copyNumber) { ploidy.MinorAlleleFrequency = Utilities.EstimateDiploidMAF(copyNumber, MeanCoverage); } } } Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now); }
/// <summary> /// Setup: Model various copy ploidies. /// </summary> public void InitializePloidies() { Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now); this.AllPloidies = new List <SegmentPloidy>(); double diploidPredictedMAF = CanvasCommon.Utilities.EstimateDiploidMAF(2, this.MeanCoverage); for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++) { for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--) { SegmentPloidy ploidy = new SegmentPloidy(); ploidy.CopyNumber = copyNumber; ploidy.MajorChromosomeCount = majorCount; ploidy.ID = AllPloidies.Count; AllPloidies.Add(ploidy); if (copyNumber == 0) { ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate continue; } float VF = majorCount / (float)copyNumber; ploidy.MinorAlleleFrequency = (VF < 0.5 ? VF : 1 - VF); if (majorCount * 2 == copyNumber) { ploidy.MinorAlleleFrequency = CanvasCommon.Utilities.EstimateDiploidMAF(copyNumber, this.MeanCoverage); } } } Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now); }
/// <summary> /// Assign a SegmentPloidy to each CanvasSegment, based on which model matches this segment best: /// </summary> void AssignPloidyCallsGaussianMixture() { // For segments with (almost) no variants alleles at all, we'll assign them a dummy MAF, and // we simply won't consider MAF when determining the closest ploidy: double dummyMAF = -1; foreach (CanvasSegment segment in this.Segments) { // Compute (MAF, Coverage) for this segment: List <double> MAF = new List <double>(); foreach (float VF in segment.VariantFrequencies) { MAF.Add(VF > 0.5 ? 1 - VF : VF); } double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts); double medianMAF = dummyMAF; SegmentPloidy bestPloidy = null; double bestProbability = 0; if (MAF.Count >= 10) { medianMAF = Utilities.Median(MAF); } Dictionary <SegmentPloidy, double> posteriorProbabilities = GaussianMixtureModel.EMComputePosteriorProbs(AllPloidies, medianMAF, medianCoverage); // Find the closest ploidy. foreach (SegmentPloidy ploidy in AllPloidies) { if (bestPloidy == null || posteriorProbabilities[ploidy] > bestProbability) { bestProbability = posteriorProbabilities[ploidy]; bestPloidy = ploidy; } } if (bestProbability == 0) { // Sanity-check: If we didn't find anything with probability > 0, then fall back to the simplest possible // thing: Call purely on coverage. segment.CopyNumber = (int)Math.Round(2 * medianCoverage / this.DiploidCoverage); segment.MajorChromosomeCount = null; } else { segment.CopyNumber = bestPloidy.CopyNumber; segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount; if (MAF.Count < 10) { segment.MajorChromosomeCount = null; // Don't assign MCC if we don't have variant allele frequencies } } } }
private void AssignPloidyCallsDistance(CoverageModel model) { InitializeModelPoints(model); foreach (CanvasSegment segment in _allSegments) { // Compute (MAF, Coverage) for this segment: List <double> mafs = new List <double>(); foreach (float variantFrequency in segment.Balleles.Frequencies) { mafs.Add(variantFrequency > 0.5 ? 1 - variantFrequency : variantFrequency); } int expectedSnpDensityCutoff = (segment.Length) / MedianHetSnpsDistance / 2; double medianCoverage = Utilities.Median(segment.Counts); double medianMaf = -1; SegmentPloidy bestPloidy = null; if (mafs.Count >= Math.Max(10, expectedSnpDensityCutoff)) { medianMaf = Utilities.Median(mafs); } double bestDistance = double.MaxValue; double secondBestDistance = double.MaxValue; foreach (SegmentPloidy ploidy in _allPloidies) { double diff = (ploidy.MixedCoverage - medianCoverage) * _coverageWeightingFactor; double distance = diff * diff; if (mafs.Count >= Math.Max(10, expectedSnpDensityCutoff)) { diff = ploidy.MixedMinorAlleleFrequency - medianMaf; distance += diff * diff; } if (distance < bestDistance) { secondBestDistance = bestDistance; bestDistance = distance; bestPloidy = ploidy; } else if (distance < secondBestDistance) { secondBestDistance = distance; } } if (bestPloidy != null) { segment.CopyNumber = bestPloidy.CopyNumber; segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount; } segment.ModelDistance = bestDistance; segment.RunnerUpModelDistance = secondBestDistance; if (mafs.Count < 10) { segment.MajorChromosomeCount = null; // Don't assign MCC if we don't have variant allele frequencies } } }
private void AssignPloidyCallsDistance(CoverageModel model, List <SegmentInfo> segments, int medianVariantCoverage) { List <ModelPoint> modelPoints = InitializeModelPoints(model); foreach (CanvasSegment segment in this.Segments) { // Compute (MAF, Coverage) for this segment: List <double> MAF = new List <double>(); foreach (float VF in segment.VariantFrequencies) { MAF.Add(VF > 0.5 ? 1 - VF : VF); } int expectedSnpDensityCutoff = (segment.End - segment.Begin) / MedianHetSnpsDistance / 2; List <Tuple <float, float> > weightedVariantFrequencies = new List <Tuple <float, float> >(); double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts); double medianMAF = -1; SegmentPloidy bestPloidy = null; if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff)) { medianMAF = Utilities.Median(MAF); } double bestDistance = double.MaxValue; double secondBestDistance = double.MaxValue; foreach (SegmentPloidy ploidy in AllPloidies) { double diff = (ploidy.MixedCoverage - medianCoverage) * CoverageWeightingFactor; double distance = diff * diff; if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff)) { diff = ploidy.MixedMinorAlleleFrequency - medianMAF; distance += diff * diff; } if (distance < bestDistance) { secondBestDistance = bestDistance; bestDistance = distance; bestPloidy = ploidy; } else if (distance < secondBestDistance) { secondBestDistance = distance; } } segment.CopyNumber = bestPloidy.CopyNumber; segment.ModelDistance = bestDistance; segment.RunnerUpModelDistance = secondBestDistance; segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount; if (MAF.Count < 10) { segment.MajorChromosomeCount = null; // Don't assign MCC if we don't have variant allele frequencies } } }