Пример #1
0
        private static ISampleMap <List <CanvasSegment> > MergeSegments(ISampleMap <List <CanvasSegment> > segments, int minimumCallSize, int qScoreThreshold)
        {
            int nSegments   = segments.First().Value.Count;
            var copyNumbers = new List <List <int> >(nSegments);
            var qscores     = new List <double>(nSegments);

            foreach (int segmentIndex in Enumerable.Range(0, nSegments))
            {
                copyNumbers.Add(segments.Select(s => s.Value[segmentIndex].CopyNumber).ToList());
                qscores.Add(segments.Select(s => s.Value[segmentIndex].QScore).Average());
            }

            if (copyNumbers == null && qscores != null || copyNumbers != null & qscores == null)
            {
                throw new ArgumentException("Both copyNumbers and qscores arguments must be specified.");
            }
            if (copyNumbers != null && copyNumbers.Count != nSegments)
            {
                throw new ArgumentException("Length of copyNumbers list should be equal to the number of segments.");
            }
            if (qscores != null && qscores.Count != nSegments)
            {
                throw new ArgumentException("Length of qscores list should be equal to the number of segments.");
            }

            var mergedSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var sampleSegments in segments)
            {
                var mergedSegmentsThisSample = CanvasSegment.MergeSegments(sampleSegments.Value.ToList(),
                                                                           minimumCallSize, 10000, copyNumbers, qscores, qScoreThreshold);
                mergedSegments.Add(sampleSegments.Key, mergedSegmentsThisSample);
            }
            return(mergedSegments);
        }
Пример #2
0
        public void TestMergeSegments()
        {
            // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected
            // merges (and no others) occurred.
            List <CanvasSegment>    allSegments = new List <CanvasSegment>();
            List <SampleGenomicBin> counts      = new List <SampleGenomicBin>();
            // Chr1 gets five segments and we should merge to three:
            CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts);

            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000000, 2000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000100, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3000000, 3100000, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3100000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr2 gets segments with a large gap between, so can't merge:
            seg            = new CanvasSegment("chr2", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 3000000, 3000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 4000000, 5000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr3 has three segments that all merge to 1 big one:
            seg            = new CanvasSegment("chr3", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 2000000, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 3000000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            var mergedSegments       = CanvasSegment.MergeSegments(allSegments, 50000, 10000);
            var segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(mergedSegments);

            Assert.Equal(3, segmentsByChromosome["chr1"].Count);
            Assert.Equal(3, segmentsByChromosome["chr2"].Count);
            Assert.Single(segmentsByChromosome["chr3"]);
        }
Пример #3
0
        public void TestMergeSegments()
        {
            // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected
            // merges (and no others) occurred.
            List <CanvasSegment> allSegments = new List <CanvasSegment>();
            List <float>         counts      = new List <float>();
            // Chr1 gets five segments and we should merge to three:
            CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts);

            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000000, 2000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 2000100, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3000000, 3100000, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr1", 3100000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr2 gets segments with a large gap between, so can't merge:
            seg            = new CanvasSegment("chr2", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 3000000, 3000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr2", 4000000, 5000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr3 has three segments that all merge to 1 big one:
            seg            = new CanvasSegment("chr3", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 2000000, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg            = new CanvasSegment("chr3", 3000000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            CanvasSegment.MergeSegments(ref allSegments, 50000, 10000);
            Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(allSegments);

            Assert.AreEqual(segmentsByChromosome["chr1"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr2"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr3"].Count, 1);
        }
Пример #4
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                _cnOracle = new CopyNumberOracle();
                _cnOracle.LoadKnownCN(truthDataPath);
            }

            _segments    = Segments.ReadSegments(_logger, new FileLocation(inFile));
            _allSegments = _segments.AllSegments.ToList();
            TempFolder   = Path.GetDirectoryName(inFile);
            if (_allSegments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, false, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath);
            }

            // load MAF
            var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome);

            _segments.AddAlleles(allelesByChromosome);
            MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average();
            AggregateVariantCoverage(ref _allSegments);

            // Create new models for different copy number states
            InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref _allSegments);
            _diploidCoverage         = Utilities.Mean(diploidCounts);
            _coverageWeightingFactor = CoverageWeighting / _diploidCoverage;
            // new coverage model
            _model = new CoverageModel {
                DiploidCoverage = _diploidCoverage
            };
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in _allSegments)
            {
                SegmentInfo info = new SegmentInfo {
                    Segment = segment
                };
                List <double> mafs = new List <double>();
                foreach (float value in segment.Balleles.Frequencies)
                {
                    mafs.Add(value > 0.5 ? 1 - value : value);
                }

                if (mafs.Count > 0)
                {
                    info.Maf = Utilities.Median(mafs);
                }
                else
                {
                    info.Maf = -1;
                }

                info.Coverage = Utilities.Median(segment.Counts);

                info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount;
                segments.Add(info);
            }

            AssignPloidyCallsDistance(_model);

            CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            var mergedSegments = CanvasSegment.MergeSegments(_allSegments);

            // recalculating qscores after merging segments improves performance!

            CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);
            CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff);

            List <string> extraHeaders       = new List <string>();
            var           coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (_cnOracle != null)
            {
                GenerateReportVersusKnownCopyNumber();
            }

            if (!string.IsNullOrEmpty(ploidy?.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, false, null, null);
            return(0);
        }
Пример #5
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments   = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyBedPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            }

            // load MAF
            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);


            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage         = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;


            // new coverage model
            this.Model            = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List <double> MAF = new List <double>();
                foreach (float value in segment.VariantFrequencies)
                {
                    MAF.Add(value > 0.5 ? 1 - value : value);
                }

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);
                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):

            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            // Merge neighboring segments that got the same copy number call.
            CanvasSegment.MergeSegments(ref this.Segments);
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline);
            List <string> extraHeaders       = new List <string>();
            string        coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }
            CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy);
            return(0);
        }