/// <summary> /// Setup: Model various copy ploidies. /// </summary> public void InitializePloidies() { Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now); this.AllPloidies = new List<SegmentPloidy>(); double diploidPredictedMAF = CanvasCommon.Utilities.EstimateDiploidMAF(2, this.MeanCoverage); for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++) { for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--) { SegmentPloidy ploidy = new SegmentPloidy(); ploidy.CopyNumber = copyNumber; ploidy.MajorChromosomeCount = majorCount; ploidy.ID = AllPloidies.Count; AllPloidies.Add(ploidy); if (copyNumber == 0) { ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate continue; } float VF = majorCount / (float)copyNumber; ploidy.MinorAlleleFrequency = (VF < 0.5 ? VF : 1 - VF); if (majorCount * 2 == copyNumber) { ploidy.MinorAlleleFrequency = CanvasCommon.Utilities.EstimateDiploidMAF(copyNumber, this.MeanCoverage); } } } Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now); }
/// <summary> /// Initialize model points by subsampling from existing segment Coverage and MAF values. /// Use distanceThreshold to ensure that both large and small cluster components get subsampled /// </summary> protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, int numClusters, double distanceThreshold) { List<ModelPoint> modelPoints = new List<ModelPoint>(); List<SegmentInfo> usableSegments = new List<SegmentInfo>(); List<SegmentInfo> usedSegments = new List<SegmentInfo>(); foreach (SegmentInfo segment in segments) { if (segment.Cluster != -1 && segment.MAF >= 0) usableSegments.Add(segment); } Random rnd = new Random(); int lastIndex = rnd.Next(1, usableSegments.Count); usedSegments.Add(usableSegments[lastIndex]); int counter = 1; double attempts = 0; while (counter < numClusters) { int newIndex = rnd.Next(1, usableSegments.Count); attempts += 1.0; double distance = GetModelDistance(usableSegments[lastIndex].Coverage, usableSegments[newIndex].Coverage, usableSegments[lastIndex].MAF, usableSegments[newIndex].MAF); if (distance > distanceThreshold || attempts/usableSegments.Count > 0.3) // escape outlier minima { usedSegments.Add(usableSegments[newIndex]); counter++; lastIndex = newIndex; attempts = 0; } } // Initialize model points with coverage and MAF values from subsampled segments for (int i = 0; i < numClusters; i++) { ModelPoint point = new ModelPoint(); point.Coverage = usedSegments[i].Coverage; point.MAF = usedSegments[i].MAF; SegmentPloidy ploidy = new SegmentPloidy(); ploidy.CopyNumber = 2; ploidy.MajorChromosomeCount = 1; point.Ploidy = ploidy; point.Cluster = i+1; modelPoints.Add(point); } return modelPoints; }