/// <summary> /// make sure a component doesn't "invade" the other components /// </summary> /// <param name="modelPoints"></param> private void EMScaleCovariancesPairwise(List <ModelPoint> modelPoints) { foreach (var m1 in modelPoints) { ModelPoint maxM = null; double maxProb = 0; foreach (var m2 in modelPoints) { if (m2 == m1) { continue; } // pretend m1 is a segment double prob = m2.Ploidy.Omega * Sigma(m1.MAF, m1.Coverage, m2.Ploidy.Mu, m2.Ploidy.Sigma); if (prob > maxProb) { maxProb = prob; maxM = m2; } } if (maxProb > 0) { double[][] s1 = m1.Ploidy.Sigma; double det1 = s1[0][0] * s1[1][1] - s1[0][1] * s1[1][0]; double[][] s2 = maxM.Ploidy.Sigma; double det2 = s2[0][0] * s2[1][1] - s2[0][1] * s2[1][0]; if (det1 <= 1E-7 || det2 <= 1E-7) { continue; } double ratio = det1 > det2 ? det1 / det2 : det2 / det1; if (ratio < 4) { continue; } if (det1 > det2) { Scale2DMatrix(s1, 0.8); Scale2DMatrix(s2, 1.1); } else { Scale2DMatrix(s2, 0.8); Scale2DMatrix(s1, 1.1); } } } }
protected List<ModelPoint> InitializeModelPoints(CoverageModel model) { List<ModelPoint> modelPoints = new List<ModelPoint>(); double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage); double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number! // Refine our estimate of diploid MAF: //double diploidMAF = this.EstimateDiploidMAF(2, model.DiploidCoverage); ///////////////////////////////////////////// // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects foreach (SegmentPloidy ploidy in this.AllPloidies) { ModelPoint point = new ModelPoint(); double pureCoverage = mu[ploidy.CopyNumber]; point.Coverage = pureCoverage; double pureMAF = ploidy.MinorAlleleFrequency; point.MAF = pureMAF; if (double.IsNaN(point.MAF)) point.MAF = 0; point.Ploidy = ploidy; modelPoints.Add(point); point.CN = ploidy.CopyNumber; ploidy.MixedMinorAlleleFrequency = point.MAF; ploidy.MixedCoverage = point.Coverage; } return modelPoints; }
/// <summary> /// Initialize model points given diploid purity modelInitialize model points given somatic purity model /// </summary> protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, double coverage, int percentPurity, int numClusters) { List<ModelPoint> modelPoints = new List<ModelPoint>(); CoveragePurityModel model = new CoveragePurityModel(); model.DiploidCoverage = coverage; model.Purity = percentPurity / 100f; double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage); double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number! ///////////////////////////////////////////// // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects foreach (SegmentPloidy ploidy in this.AllPloidies) { ModelPoint point = new ModelPoint(); double pureCoverage = mu[ploidy.CopyNumber]; point.Coverage = (model.Purity * pureCoverage) + (1 - model.Purity) * model.DiploidCoverage; double pureMAF = ploidy.MinorAlleleFrequency; if (ploidy.MajorChromosomeCount * 2 == ploidy.CopyNumber) { point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 2 * diploidMAF); point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2; if (double.IsNaN(point.MAF)) point.MAF = 0; } else { point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 1); point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2; } point.Ploidy = ploidy; modelPoints.Add(point); point.CN = ploidy.CopyNumber; ploidy.MixedMinorAlleleFrequency = point.MAF; ploidy.MixedCoverage = point.Coverage; } // estimate distance between each model point and segments List<double> modelPointsScore = new List<double>(); foreach (ModelPoint modelPoint in modelPoints) { List<double> distanceList = new List<double>(); foreach (SegmentInfo info in segments) { if (info.MAF >= 0) distanceList.Add(GetModelDistance(info.Coverage, modelPoint.Coverage, info.MAF, modelPoint.MAF)); } distanceList.Sort(); double v15th_percentile = distanceList[Convert.ToInt32(distanceList.Count * 0.15)]; // use model points with good fit to observed values modelPointsScore.Add(v15th_percentile); } // sort list and return indices var sortedScores = modelPointsScore.Select((x, i) => new KeyValuePair<double, int>(x, i)).OrderBy(x => x.Key).ToList(); List<double> scoresValue = sortedScores.Select(x => x.Key).ToList(); List<int> scoresIndex = sortedScores.Select(x => x.Value).ToList(); List<ModelPoint> selectedModelPoints = new List<ModelPoint>(); for (int i = 0; i < numClusters; i++) { modelPoints[scoresIndex[i]].Cluster = i + 1; selectedModelPoints.Add(modelPoints[scoresIndex[i]]); } return selectedModelPoints; }
// Initialize model points given expected ploidy and purity values protected List<ModelPoint> InitializeModelPoints(CoveragePurityModel model) { List<ModelPoint> modelPoints = new List<ModelPoint>(); double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage); double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number! ///////////////////////////////////////////// // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects foreach (SegmentPloidy ploidy in this.AllPloidies) { ModelPoint point = new ModelPoint(); double pureCoverage = mu[ploidy.CopyNumber]; point.Coverage = (model.Purity * pureCoverage) + (1 - model.Purity) * model.DiploidCoverage; double pureMAF = ploidy.MinorAlleleFrequency; if (ploidy.MajorChromosomeCount * 2 == ploidy.CopyNumber) { point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 2 * diploidMAF); point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2; if (double.IsNaN(point.MAF)) point.MAF = 0; } else { point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 1); point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2; } point.Ploidy = ploidy; modelPoints.Add(point); point.CN = ploidy.CopyNumber; ploidy.MixedMinorAlleleFrequency = point.MAF; ploidy.MixedCoverage = point.Coverage; } return modelPoints; }
/// <summary> /// Initialize model points by subsampling from existing segment Coverage and MAF values. /// Use distanceThreshold to ensure that both large and small cluster components get subsampled /// </summary> protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, int numClusters, double distanceThreshold) { List<ModelPoint> modelPoints = new List<ModelPoint>(); List<SegmentInfo> usableSegments = new List<SegmentInfo>(); List<SegmentInfo> usedSegments = new List<SegmentInfo>(); foreach (SegmentInfo segment in segments) { if (segment.Cluster != -1 && segment.MAF >= 0) usableSegments.Add(segment); } Random rnd = new Random(); int lastIndex = rnd.Next(1, usableSegments.Count); usedSegments.Add(usableSegments[lastIndex]); int counter = 1; double attempts = 0; while (counter < numClusters) { int newIndex = rnd.Next(1, usableSegments.Count); attempts += 1.0; double distance = GetModelDistance(usableSegments[lastIndex].Coverage, usableSegments[newIndex].Coverage, usableSegments[lastIndex].MAF, usableSegments[newIndex].MAF); if (distance > distanceThreshold || attempts/usableSegments.Count > 0.3) // escape outlier minima { usedSegments.Add(usableSegments[newIndex]); counter++; lastIndex = newIndex; attempts = 0; } } // Initialize model points with coverage and MAF values from subsampled segments for (int i = 0; i < numClusters; i++) { ModelPoint point = new ModelPoint(); point.Coverage = usedSegments[i].Coverage; point.MAF = usedSegments[i].MAF; SegmentPloidy ploidy = new SegmentPloidy(); ploidy.CopyNumber = 2; ploidy.MajorChromosomeCount = 1; point.Ploidy = ploidy; point.Cluster = i+1; modelPoints.Add(point); } return modelPoints; }