Exemplo n.º 1
0
        protected List<ModelPoint> InitializeModelPoints(CoverageModel model)
        {
            List<ModelPoint> modelPoints = new List<ModelPoint>();

            double[] mu = GetProjectedMeanCoverage(model.DiploidCoverage);
            double diploidMAF = this.AllPloidies[3].MinorAlleleFrequency; /// %%% Magic number!
            // Refine our estimate of diploid MAF:
            //double diploidMAF = this.EstimateDiploidMAF(2, model.DiploidCoverage);

            /////////////////////////////////////////////
            // Update the parameters in each SegmentPloidy object, and construct corresponding SegmentInfo objects
            foreach (SegmentPloidy ploidy in this.AllPloidies)
            {
                ModelPoint point = new ModelPoint();
                double pureCoverage = mu[ploidy.CopyNumber];
                point.Coverage = pureCoverage;
                double pureMAF = ploidy.MinorAlleleFrequency;
                point.MAF = pureMAF;
                if (double.IsNaN(point.MAF)) point.MAF = 0;
                point.Ploidy = ploidy;
                modelPoints.Add(point);
                point.CN = ploidy.CopyNumber;
                ploidy.MixedMinorAlleleFrequency = point.MAF;
                ploidy.MixedCoverage = point.Coverage;
            }

            return modelPoints;
        }
Exemplo n.º 2
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
            string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null);
                return 0;
            }
            PloidyInfo ploidy = null;
            if (!string.IsNullOrEmpty(ploidyBedPath)) ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);

            // load MAF
            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);

            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;

            // new coverage model
            this.Model = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List<SegmentInfo> segments = new List<SegmentInfo>();
            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List<double> MAF = new List<double>();
                foreach (float value in segment.VariantFrequencies) MAF.Add(value > 0.5 ? 1 - value : value);

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);

                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):
            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            // Merge neighboring segments that got the same copy number call.
            CanvasSegment.MergeSegments(ref this.Segments);
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline);
            List<string> extraHeaders = new List<string>();
            string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile);
            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine)) extraHeaders.Add(ploidy.HeaderLine);
            CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy);
            return 0;
        }
Exemplo n.º 3
0
        /// <summary>
        /// Fit a Gaussian mixture model.
        /// Fix the means to the model MAF and Coverage and run the EM algorithm until convergence.
        /// Compute the empirical MAF and Coverage.
        /// Fix the means to the empirical MAF and Coverage and run the EM algorithm again until convergence.
        /// Always estimate the full covariance matrix?
        /// </summary>
        /// <param name="model"></param>
        /// <param name="segments"></param>
        /// <param name="debugPath"></param>
        /// <returns></returns>
        private double FitGaussians(CoverageModel model, List<SegmentInfo> segments, string debugPath = null)
        {
            List<ModelPoint> modelPoints = InitializeModelPoints(model);

            GaussianMixtureModel gmm = new GaussianMixtureModel(modelPoints, segments, this.MeanCoverage, this.CoverageWeightingFactor, 0);
            double likelihood = gmm.Fit();

            if (debugPath != null)
            {
                // write Gaussian mixture model to debugPath
                using (StreamWriter writer = new StreamWriter(debugPath))
                {
                    writer.WriteLine("CN\tMajor Chr #\tMAF\tCoverage\tOmega\tMu0\tMu1\tSigma00\tSigma01\tSigma10\tSigma11");
                    foreach (ModelPoint modelPoint in modelPoints)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}",
                            modelPoint.Ploidy.CopyNumber, modelPoint.Ploidy.MajorChromosomeCount,
                            modelPoint.Ploidy.MixedMinorAlleleFrequency, modelPoint.Ploidy.MixedCoverage,
                            modelPoint.Ploidy.Omega, modelPoint.Ploidy.Mu[0], modelPoint.Ploidy.Mu[1],
                            modelPoint.Ploidy.Sigma[0][0], modelPoint.Ploidy.Sigma[0][1],
                            modelPoint.Ploidy.Sigma[1][0], modelPoint.Ploidy.Sigma[1][1]);
                    }

                    writer.WriteLine("");
                    writer.WriteLine("MAF\tCoverage\tPosterior Probabilities");
                    StringBuilder sb = new StringBuilder();
                    foreach (SegmentInfo segment in segments)
                    {
                        sb.Clear();
                        sb.AppendFormat("{0}\t{1}", segment.MAF, segment.Coverage);
                        foreach (ModelPoint modelPoint in modelPoints)
                        {
                            sb.AppendFormat("\t{0}", segment.PosteriorProbs[modelPoint]);
                        }
                        writer.WriteLine(sb.ToString());
                    }
                }
            }

            return likelihood;
        }
Exemplo n.º 4
0
        private void AssignPloidyCallsDistance(CoverageModel model, List<SegmentInfo> segments, int medianVariantCoverage)
        {
            List<ModelPoint> modelPoints = InitializeModelPoints(model);
            foreach (CanvasSegment segment in this.Segments)
            {
                // Compute (MAF, Coverage) for this segment:
                List<double> MAF = new List<double>();
                foreach (float VF in segment.VariantFrequencies) MAF.Add(VF > 0.5 ? 1 - VF : VF);
                int expectedSnpDensityCutoff = (segment.End - segment.Begin) / MedianHetSnpsDistance / 2;

                List<Tuple<float, float>> weightedVariantFrequencies = new List<Tuple<float, float>>();
                double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts);

                double medianMAF = -1;

                SegmentPloidy bestPloidy = null;

                if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff))
                {
                    medianMAF = Utilities.Median(MAF);
                }

                double bestDistance = double.MaxValue;
                double secondBestDistance = double.MaxValue;

                foreach (SegmentPloidy ploidy in AllPloidies)
                {
                    double diff = (ploidy.MixedCoverage - medianCoverage) * CoverageWeightingFactor;
                    double distance = diff * diff;
                    if (MAF.Count >= Math.Max(10, expectedSnpDensityCutoff))
                    {
                        diff = ploidy.MixedMinorAlleleFrequency - medianMAF;
                        distance += diff * diff;
                    }
                    if (distance < bestDistance)
                    {
                        secondBestDistance = bestDistance;
                        bestDistance = distance;
                        bestPloidy = ploidy;
                    }
                    else if (distance < secondBestDistance)
                    {
                        secondBestDistance = distance;
                    }
                }
                segment.CopyNumber = bestPloidy.CopyNumber;
                segment.ModelDistance = bestDistance;
                segment.RunnerUpModelDistance = secondBestDistance;

                segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount;
                if (MAF.Count < 10) segment.MajorChromosomeCount = null; // Don't assign MCC if we don't have variant allele frequencies
            }
        }
Exemplo n.º 5
0
        private void AssignPloidyCallsDistance(CoverageModel model, List<SegmentInfo> segments, int medianVariantCoverage)
        {
            List<ModelPoint> modelPoints = InitializeModelPoints(model);

            foreach (CanvasSegment segment in this.Segments)
            {
                // Compute (MAF, Coverage) for this segment:
                List<double> MAF = new List<double>();
                foreach (float VF in segment.VariantFrequencies) MAF.Add(VF > 0.5 ? 1 - VF : VF);
                List<Tuple<float, float>> weightedVariantFrequencies = new List<Tuple<float, float>>();
                double medianCoverage = CanvasCommon.Utilities.Median(segment.Counts);
                for (int i = 0; i < MAF.Count; i++)
                {
                    // for now penalize only low-coverage regions 
                    float variantWeight = 0;
                    if (segment.VariantTotalCoverage[i] < medianVariantCoverage)
                    {
                        variantWeight = Convert.ToSingle(segment.VariantTotalCoverage[i] / medianVariantCoverage);
                    }
                    else
                    {
                        variantWeight = 1;
                    }

                    weightedVariantFrequencies.Add(Tuple.Create(Convert.ToSingle(MAF[i]), variantWeight));
                }

                double medianMAF = -1;

                SegmentPloidy bestPloidy = null;

                if (MAF.Count >= 10)
                {
                    medianMAF = Utilities.WeightedMedian(weightedVariantFrequencies);
                }

                double bestDistance = double.MaxValue;
                double secondBestDistance = double.MaxValue;

                foreach (SegmentPloidy ploidy in AllPloidies)
                {
                    double diff = (ploidy.MixedCoverage - medianCoverage) * CoverageWeightingFactor;
                    double distance = diff * diff;
                    if (MAF.Count >= 10)
                    {
                        diff = ploidy.MixedMinorAlleleFrequency - medianMAF;
                        distance += diff * diff;
                    }
                    if (distance < bestDistance)
                    {
                        secondBestDistance = bestDistance;
                        bestDistance = distance;
                        bestPloidy = ploidy;
                    }
                    else if (distance < secondBestDistance)
                    {
                        secondBestDistance = distance;
                    }
                }
                segment.CopyNumber = bestPloidy.CopyNumber;
                segment.ModelDistance = bestDistance;
                segment.RunnerUpModelDistance = secondBestDistance;

                segment.MajorChromosomeCount = bestPloidy.MajorChromosomeCount;
                if (MAF.Count < 10) segment.MajorChromosomeCount = null; // Don't assign MCC if we don't have variant allele frequencies
            }
        }