Represents a model point in (MAF, Coverage) space
Пример #1
0
        /// <summary>
        /// make sure a component doesn't "invade" the other components
        /// </summary>
        /// <param name="modelPoints"></param>
        private void EMScaleCovariancesPairwise(List <ModelPoint> modelPoints)
        {
            foreach (var m1 in modelPoints)
            {
                ModelPoint maxM    = null;
                double     maxProb = 0;
                foreach (var m2 in modelPoints)
                {
                    if (m2 == m1)
                    {
                        continue;
                    }
                    // pretend m1 is a segment
                    double prob = m2.Ploidy.Omega * Sigma(m1.MAF, m1.Coverage, m2.Ploidy.Mu, m2.Ploidy.Sigma);
                    if (prob > maxProb)
                    {
                        maxProb = prob;
                        maxM    = m2;
                    }
                }
                if (maxProb > 0)
                {
                    double[][] s1   = m1.Ploidy.Sigma;
                    double     det1 = s1[0][0] * s1[1][1] - s1[0][1] * s1[1][0];
                    double[][] s2   = maxM.Ploidy.Sigma;
                    double     det2 = s2[0][0] * s2[1][1] - s2[0][1] * s2[1][0];
                    if (det1 <= 1E-7 || det2 <= 1E-7)
                    {
                        continue;
                    }

                    double ratio = det1 > det2 ? det1 / det2 : det2 / det1;
                    if (ratio < 4)
                    {
                        continue;
                    }
                    if (det1 > det2)
                    {
                        Scale2DMatrix(s1, 0.8);
                        Scale2DMatrix(s2, 1.1);
                    }
                    else
                    {
                        Scale2DMatrix(s2, 0.8);
                        Scale2DMatrix(s1, 1.1);
                    }
                }
            }
        }
Пример #2
0
        protected List<ModelPoint> InitializeModelPoints(CoverageModel model)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();

            double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage);
            double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number!
            // Refine our estimate of diploid MAF:
            //double diploidMAF = this.EstimateDiploidMAF(2, model.DiploidCoverage);

            /////////////////////////////////////////////
            // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects
            foreach (SegmentPloidy ploidy in this.AllPloidies)
            {
                ModelPoint point = new ModelPoint();
                double pureCoverage = mu[ploidy.CopyNumber];
                point.Coverage = pureCoverage;
                double pureMAF = ploidy.MinorAlleleFrequency;
                point.MAF = pureMAF;
                if (double.IsNaN(point.MAF)) point.MAF = 0;
                point.Ploidy = ploidy;
                modelPoints.Add(point);
                point.CN = ploidy.CopyNumber;
                ploidy.MixedMinorAlleleFrequency = point.MAF;
                ploidy.MixedCoverage = point.Coverage;
            }

            return modelPoints;
        }
Пример #3
0
        /// <summary>
        ///  Initialize model points given diploid purity modelInitialize model points given somatic purity model
        /// </summary>
        protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, double coverage, int percentPurity, int numClusters)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();
            CoveragePurityModel model = new CoveragePurityModel();
            model.DiploidCoverage = coverage;
            model.Purity = percentPurity / 100f;

            double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage);
            double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number!


            /////////////////////////////////////////////
            // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects
            foreach (SegmentPloidy ploidy in this.AllPloidies)
            {
                ModelPoint point = new ModelPoint();
                double pureCoverage = mu[ploidy.CopyNumber];
                point.Coverage = (model.Purity * pureCoverage) + (1 - model.Purity) * model.DiploidCoverage;
                double pureMAF = ploidy.MinorAlleleFrequency;
                if (ploidy.MajorChromosomeCount * 2 == ploidy.CopyNumber)
                {
                    point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 2 * diploidMAF);
                    point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2;
                    if (double.IsNaN(point.MAF)) point.MAF = 0;
                }
                else
                {
                    point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 1);
                    point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2;
                }
                point.Ploidy = ploidy;
                modelPoints.Add(point);
                point.CN = ploidy.CopyNumber;
                ploidy.MixedMinorAlleleFrequency = point.MAF;
                ploidy.MixedCoverage = point.Coverage;
            }

            // estimate distance between each model point and segments 
            List<double> modelPointsScore = new List<double>();
            foreach (ModelPoint modelPoint in modelPoints)
            {
                List<double> distanceList = new List<double>();
                foreach (SegmentInfo info in segments)
                {
                    if (info.MAF >= 0)
                        distanceList.Add(GetModelDistance(info.Coverage, modelPoint.Coverage, info.MAF, modelPoint.MAF));
                }
                distanceList.Sort();
                double v15th_percentile = distanceList[Convert.ToInt32(distanceList.Count * 0.15)];
                // use model points with good fit to observed values
                modelPointsScore.Add(v15th_percentile);
            }
            // sort list and return indices
            var sortedScores = modelPointsScore.Select((x, i) => new KeyValuePair<double, int>(x, i)).OrderBy(x => x.Key).ToList();
            List<double> scoresValue = sortedScores.Select(x => x.Key).ToList();
            List<int> scoresIndex = sortedScores.Select(x => x.Value).ToList();

            List<ModelPoint> selectedModelPoints = new List<ModelPoint>();

            for (int i = 0; i < numClusters; i++)
            {
                modelPoints[scoresIndex[i]].Cluster = i + 1;
                selectedModelPoints.Add(modelPoints[scoresIndex[i]]);
            }

            return selectedModelPoints;
        }
Пример #4
0
        // Initialize model points given expected ploidy and purity values 
        protected List<ModelPoint> InitializeModelPoints(CoveragePurityModel model)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();

            double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage);
            double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number!


            /////////////////////////////////////////////
            // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects
            foreach (SegmentPloidy ploidy in this.AllPloidies)
            {
                ModelPoint point = new ModelPoint();
                double pureCoverage = mu[ploidy.CopyNumber];
                point.Coverage = (model.Purity * pureCoverage) + (1 - model.Purity) * model.DiploidCoverage;
                double pureMAF = ploidy.MinorAlleleFrequency;
                if (ploidy.MajorChromosomeCount * 2 == ploidy.CopyNumber)
                {
                    point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 2 * diploidMAF);
                    point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2;
                    if (double.IsNaN(point.MAF)) point.MAF = 0;
                }
                else
                {
                    point.MAF = (model.Purity * ploidy.CopyNumber * pureMAF) + ((1 - model.Purity) * 1);
                    point.MAF /= model.Purity * ploidy.CopyNumber + (1 - model.Purity) * 2;
                }
                point.Ploidy = ploidy;
                modelPoints.Add(point);
                point.CN = ploidy.CopyNumber;
                ploidy.MixedMinorAlleleFrequency = point.MAF;
                ploidy.MixedCoverage = point.Coverage;
            }

            return modelPoints;
        }
Пример #5
0
        /// <summary>
        ///  Initialize model points by subsampling from existing segment Coverage and MAF values. 
        ///  Use distanceThreshold to ensure that both large and small cluster components get subsampled
        /// </summary>
        protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, int numClusters, double distanceThreshold)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();
            List<SegmentInfo> usableSegments = new List<SegmentInfo>();
            List<SegmentInfo> usedSegments = new List<SegmentInfo>();

            foreach (SegmentInfo segment in segments) {
                if (segment.Cluster != -1 && segment.MAF >= 0)
                    usableSegments.Add(segment);
            }

            Random rnd = new Random();
            int lastIndex = rnd.Next(1, usableSegments.Count);
            usedSegments.Add(usableSegments[lastIndex]);
            int counter = 1;
            double attempts = 0;
            while (counter < numClusters)
            {
                int newIndex = rnd.Next(1, usableSegments.Count);
                attempts += 1.0;
                double distance = GetModelDistance(usableSegments[lastIndex].Coverage, usableSegments[newIndex].Coverage, usableSegments[lastIndex].MAF, usableSegments[newIndex].MAF);
                if (distance > distanceThreshold || attempts/usableSegments.Count > 0.3) // escape outlier minima
                {
                    usedSegments.Add(usableSegments[newIndex]);
                    counter++;
                    lastIndex = newIndex;
                    attempts = 0;
                }
            }
            // Initialize model points with coverage and MAF values from subsampled segments
            for (int i = 0; i < numClusters; i++)
            {
                ModelPoint point = new ModelPoint();
                point.Coverage = usedSegments[i].Coverage;
                point.MAF = usedSegments[i].MAF;
                SegmentPloidy ploidy = new SegmentPloidy();
                ploidy.CopyNumber = 2;
                ploidy.MajorChromosomeCount = 1;
                point.Ploidy = ploidy;
                point.Cluster = i+1;
                modelPoints.Add(point);
            } 
            return modelPoints;
        }