Ejemplo n.º 1
0
 /// <summary>
 /// Setup: Model various copy ploidies.
 /// </summary>
 public void InitializePloidies()
 {
     Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now);
     _allPloidies = new List <SegmentPloidy>();
     Utilities.EstimateDiploidMAF(2, MeanCoverage);
     for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++)
     {
         for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--)
         {
             SegmentPloidy ploidy = new SegmentPloidy
             {
                 CopyNumber           = copyNumber,
                 MajorChromosomeCount = majorCount,
                 Index = _allPloidies.Count
             };
             _allPloidies.Add(ploidy);
             if (copyNumber == 0)
             {
                 ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate
                 continue;
             }
             float variantFrequency = majorCount / (float)copyNumber;
             ploidy.MinorAlleleFrequency = variantFrequency < 0.5 ? variantFrequency : 1 - variantFrequency;
             if (majorCount * 2 == copyNumber)
             {
                 ploidy.MinorAlleleFrequency = Utilities.EstimateDiploidMAF(copyNumber, MeanCoverage);
             }
         }
     }
     Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now);
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Setup: Model various copy ploidies.
        /// </summary>
        public void InitializePloidies()
        {
            Console.WriteLine("{0} Initialize ploidy models...", DateTime.Now);
            this.AllPloidies = new List <SegmentPloidy>();
            double diploidPredictedMAF = CanvasCommon.Utilities.EstimateDiploidMAF(2, this.MeanCoverage);

            for (int copyNumber = 0; copyNumber <= MaximumCopyNumber; copyNumber++)
            {
                for (int majorCount = copyNumber; majorCount * 2 >= copyNumber; majorCount--)
                {
                    SegmentPloidy ploidy = new SegmentPloidy();
                    ploidy.CopyNumber           = copyNumber;
                    ploidy.MajorChromosomeCount = majorCount;
                    ploidy.ID = AllPloidies.Count;
                    AllPloidies.Add(ploidy);
                    if (copyNumber == 0)
                    {
                        ploidy.MinorAlleleFrequency = 0.01; // should reflect sequencing error rate
                        continue;
                    }
                    float VF = majorCount / (float)copyNumber;
                    ploidy.MinorAlleleFrequency = (VF < 0.5 ? VF : 1 - VF);
                    if (majorCount * 2 == copyNumber)
                    {
                        ploidy.MinorAlleleFrequency = CanvasCommon.Utilities.EstimateDiploidMAF(copyNumber, this.MeanCoverage);
                    }
                }
            }
            Console.WriteLine("{0} Ploidy models prepared.", DateTime.Now);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Assign a SegmentPloidy to each CanvasSegment, based on which model matches this segment best:
        /// </summary>
        void AssignPloidyCallsGaussianMixture()
        {
            // For segments with (almost) no variants alleles at all, we'll assign them a dummy MAF, and
            // we simply won't consider MAF when determining the closest ploidy:
            double dummyMAF = -1;

            foreach (CanvasSegment segment in this.Segments)
            {
                // Compute (MAF, Coverage) for this segment:
                List <double> MAF = new List <double>();
                foreach (float VF in segment.VariantFrequencies)
                {
                    MAF.Add(VF > 0.5 ? 1 - VF : VF);
                }
                double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts);
                double medianMAF      = dummyMAF;

                SegmentPloidy bestPloidy      = null;
                double        bestProbability = 0;

                if (MAF.Count >= 10)
                {
                    medianMAF = Utilities.Median(MAF);
                }

                Dictionary <SegmentPloidy, double> posteriorProbabilities = GaussianMixtureModel.EMComputePosteriorProbs(AllPloidies, medianMAF, medianCoverage);
                // Find the closest ploidy.
                foreach (SegmentPloidy ploidy in AllPloidies)
                {
                    if (bestPloidy == null || posteriorProbabilities[ploidy] > bestProbability)
                    {
                        bestProbability = posteriorProbabilities[ploidy];
                        bestPloidy      = ploidy;
                    }
                }

                if (bestProbability == 0)
                {
                    // Sanity-check: If we didn't find anything with probability > 0, then fall back to the simplest possible
                    // thing: Call purely on coverage.
                    segment.CopyNumber           = (int)Math.Round(2 * medianCoverage / this.DiploidCoverage);
                    segment.MajorChromosomeCount = null;
                }
                else
                {
                    segment.CopyNumber           = bestPloidy.CopyNumber;
                    segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount;
                    if (MAF.Count < 10)
                    {
                        segment.MajorChromosomeCount = null;                 // Don't assign MCC if we don't have variant allele frequencies
                    }
                }
            }
        }
Ejemplo n.º 4
0
        private void AssignPloidyCallsDistance(CoverageModel model)
        {
            InitializeModelPoints(model);
            foreach (CanvasSegment segment in _allSegments)
            {
                // Compute (MAF, Coverage) for this segment:
                List <double> mafs = new List <double>();
                foreach (float variantFrequency in segment.Balleles.Frequencies)
                {
                    mafs.Add(variantFrequency > 0.5 ? 1 - variantFrequency : variantFrequency);
                }
                int expectedSnpDensityCutoff = (segment.Length) / MedianHetSnpsDistance / 2;

                double medianCoverage = Utilities.Median(segment.Counts);

                double medianMaf = -1;

                SegmentPloidy bestPloidy = null;

                if (mafs.Count >= Math.Max(10, expectedSnpDensityCutoff))
                {
                    medianMaf = Utilities.Median(mafs);
                }

                double bestDistance       = double.MaxValue;
                double secondBestDistance = double.MaxValue;

                foreach (SegmentPloidy ploidy in _allPloidies)
                {
                    double diff     = (ploidy.MixedCoverage - medianCoverage) * _coverageWeightingFactor;
                    double distance = diff * diff;
                    if (mafs.Count >= Math.Max(10, expectedSnpDensityCutoff))
                    {
                        diff      = ploidy.MixedMinorAlleleFrequency - medianMaf;
                        distance += diff * diff;
                    }
                    if (distance < bestDistance)
                    {
                        secondBestDistance = bestDistance;
                        bestDistance       = distance;
                        bestPloidy         = ploidy;
                    }
                    else if (distance < secondBestDistance)
                    {
                        secondBestDistance = distance;
                    }
                }
                if (bestPloidy != null)
                {
                    segment.CopyNumber           = bestPloidy.CopyNumber;
                    segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount;
                }
                segment.ModelDistance         = bestDistance;
                segment.RunnerUpModelDistance = secondBestDistance;

                if (mafs.Count < 10)
                {
                    segment.MajorChromosomeCount = null;                  // Don't assign MCC if we don't have variant allele frequencies
                }
            }
        }
Ejemplo n.º 5
0
        private void AssignPloidyCallsDistance(CoverageModel model, List <SegmentInfo> segments, int medianVariantCoverage)
        {
            List <ModelPoint> modelPoints = InitializeModelPoints(model);

            foreach (CanvasSegment segment in this.Segments)
            {
                // Compute (MAF, Coverage) for this segment:
                List <double> MAF = new List <double>();
                foreach (float VF in segment.VariantFrequencies)
                {
                    MAF.Add(VF > 0.5 ? 1 - VF : VF);
                }
                int expectedSnpDensityCutoff = (segment.End - segment.Begin) / MedianHetSnpsDistance / 2;


                List <Tuple <float, float> > weightedVariantFrequencies = new List <Tuple <float, float> >();
                double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts);

                double medianMAF = -1;

                SegmentPloidy bestPloidy = null;

                if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff))
                {
                    medianMAF = Utilities.Median(MAF);
                }

                double bestDistance       = double.MaxValue;
                double secondBestDistance = double.MaxValue;

                foreach (SegmentPloidy ploidy in AllPloidies)
                {
                    double diff     = (ploidy.MixedCoverage - medianCoverage) * CoverageWeightingFactor;
                    double distance = diff * diff;
                    if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff))
                    {
                        diff      = ploidy.MixedMinorAlleleFrequency - medianMAF;
                        distance += diff * diff;
                    }
                    if (distance < bestDistance)
                    {
                        secondBestDistance = bestDistance;
                        bestDistance       = distance;
                        bestPloidy         = ploidy;
                    }
                    else if (distance < secondBestDistance)
                    {
                        secondBestDistance = distance;
                    }
                }
                segment.CopyNumber            = bestPloidy.CopyNumber;
                segment.ModelDistance         = bestDistance;
                segment.RunnerUpModelDistance = secondBestDistance;

                segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount;
                if (MAF.Count < 10)
                {
                    segment.MajorChromosomeCount = null;                 // Don't assign MCC if we don't have variant allele frequencies
                }
            }
        }