a SegmentPloidy represents a potential copy number state of a genomic interval. It has a copy number and a major chromosome count.
Beispiel #1
0
        /// <summary>
        /// Setup: Model various copy ploidies.
        /// </summary>
        public void InitializePloidies()
        {
            Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now);
            this.AllPloidies = new List<SegmentPloidy>();
            double diploidPredictedMAF = CanvasCommon.Utilities.EstimateDiploidMAF(2, this.MeanCoverage);
            for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++)
            {
                for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--)
                {
                    SegmentPloidy ploidy = new SegmentPloidy();
                    ploidy.CopyNumber = copyNumber;
                    ploidy.MajorChromosomeCount = majorCount;
                    ploidy.ID = AllPloidies.Count;
                    AllPloidies.Add(ploidy);
                    if (copyNumber == 0)
                    {
                        ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate
                        continue;
                    }
                    float VF = majorCount / (float)copyNumber;
                    ploidy.MinorAlleleFrequency = (VF < 0.5 ? VF : 1 - VF);
                    if (majorCount * 2 == copyNumber)
                    {
                        ploidy.MinorAlleleFrequency = CanvasCommon.Utilities.EstimateDiploidMAF(copyNumber, this.MeanCoverage);
                    }

                }
            }
            Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now);
        }
Beispiel #2
0
        /// <summary>
        ///  Initialize model points by subsampling from existing segment Coverage and MAF values. 
        ///  Use distanceThreshold to ensure that both large and small cluster components get subsampled
        /// </summary>
        protected List<ModelPoint> InitializeModelPoints(List<SegmentInfo> segments, int numClusters, double distanceThreshold)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();
            List<SegmentInfo> usableSegments = new List<SegmentInfo>();
            List<SegmentInfo> usedSegments = new List<SegmentInfo>();

            foreach (SegmentInfo segment in segments) {
                if (segment.Cluster != -1 && segment.MAF >= 0)
                    usableSegments.Add(segment);
            }

            Random rnd = new Random();
            int lastIndex = rnd.Next(1, usableSegments.Count);
            usedSegments.Add(usableSegments[lastIndex]);
            int counter = 1;
            double attempts = 0;
            while (counter < numClusters)
            {
                int newIndex = rnd.Next(1, usableSegments.Count);
                attempts += 1.0;
                double distance = GetModelDistance(usableSegments[lastIndex].Coverage, usableSegments[newIndex].Coverage, usableSegments[lastIndex].MAF, usableSegments[newIndex].MAF);
                if (distance > distanceThreshold || attempts/usableSegments.Count > 0.3) // escape outlier minima
                {
                    usedSegments.Add(usableSegments[newIndex]);
                    counter++;
                    lastIndex = newIndex;
                    attempts = 0;
                }
            }
            // Initialize model points with coverage and MAF values from subsampled segments
            for (int i = 0; i < numClusters; i++)
            {
                ModelPoint point = new ModelPoint();
                point.Coverage = usedSegments[i].Coverage;
                point.MAF = usedSegments[i].MAF;
                SegmentPloidy ploidy = new SegmentPloidy();
                ploidy.CopyNumber = 2;
                ploidy.MajorChromosomeCount = 1;
                point.Ploidy = ploidy;
                point.Cluster = i+1;
                modelPoints.Add(point);
            } 
            return modelPoints;
        }