Esempio n. 1
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                _cnOracle = new CopyNumberOracle();
                _cnOracle.LoadKnownCN(truthDataPath);
            }

            _segments    = Segments.ReadSegments(_logger, new FileLocation(inFile));
            _allSegments = _segments.AllSegments.ToList();
            TempFolder   = Path.GetDirectoryName(inFile);
            if (_allSegments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, false, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath);
            }

            // load MAF
            var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome);

            _segments.AddAlleles(allelesByChromosome);
            MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average();
            AggregateVariantCoverage(ref _allSegments);

            // Create new models for different copy number states
            InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref _allSegments);
            _diploidCoverage         = Utilities.Mean(diploidCounts);
            _coverageWeightingFactor = CoverageWeighting / _diploidCoverage;
            // new coverage model
            _model = new CoverageModel {
                DiploidCoverage = _diploidCoverage
            };
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in _allSegments)
            {
                SegmentInfo info = new SegmentInfo {
                    Segment = segment
                };
                List <double> mafs = new List <double>();
                foreach (float value in segment.Balleles.Frequencies)
                {
                    mafs.Add(value > 0.5 ? 1 - value : value);
                }

                if (mafs.Count > 0)
                {
                    info.Maf = Utilities.Median(mafs);
                }
                else
                {
                    info.Maf = -1;
                }

                info.Coverage = Utilities.Median(segment.Counts);

                info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount;
                segments.Add(info);
            }

            AssignPloidyCallsDistance(_model);

            CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            var mergedSegments = CanvasSegment.MergeSegments(_allSegments);

            // recalculating qscores after merging segments improves performance!

            CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);
            CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff);

            List <string> extraHeaders       = new List <string>();
            var           coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (_cnOracle != null)
            {
                GenerateReportVersusKnownCopyNumber();
            }

            if (!string.IsNullOrEmpty(ploidy?.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, false, null, null);
            return(0);
        }
Esempio n. 2
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments   = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, this.Segments, Model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, isPedigreeInfoSupplied: false);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyBedPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            }

            // load MAF
            this.MeanCoverage = CanvasIO.LoadFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);


            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage         = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;
            // new coverage model
            this.Model            = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List <double> MAF = new List <double>();
                foreach (float value in segment.Alleles.Frequencies)
                {
                    MAF.Add(value > 0.5 ? 1 - value : value);
                }

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);
                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):

            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline, germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            CanvasSegment.MergeSegments(ref this.Segments);
            // recalculating qscores after merging segments improves performance!
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline, germlineScoreParameters);
            CanvasSegment.FilterSegments(QualityFilterThreshold, Segments);

            List <string> extraHeaders       = new List <string>();
            string        coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, this.Segments, Model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, isPedigreeInfoSupplied: false);
            return(0);
        }