private static ISampleMap <List <CanvasSegment> > MergeSegments(ISampleMap <List <CanvasSegment> > segments, int minimumCallSize, int qScoreThreshold) { int nSegments = segments.First().Value.Count; var copyNumbers = new List <List <int> >(nSegments); var qscores = new List <double>(nSegments); foreach (int segmentIndex in Enumerable.Range(0, nSegments)) { copyNumbers.Add(segments.Select(s => s.Value[segmentIndex].CopyNumber).ToList()); qscores.Add(segments.Select(s => s.Value[segmentIndex].QScore).Average()); } if (copyNumbers == null && qscores != null || copyNumbers != null & qscores == null) { throw new ArgumentException("Both copyNumbers and qscores arguments must be specified."); } if (copyNumbers != null && copyNumbers.Count != nSegments) { throw new ArgumentException("Length of copyNumbers list should be equal to the number of segments."); } if (qscores != null && qscores.Count != nSegments) { throw new ArgumentException("Length of qscores list should be equal to the number of segments."); } var mergedSegments = new SampleMap <List <CanvasSegment> >(); foreach (var sampleSegments in segments) { var mergedSegmentsThisSample = CanvasSegment.MergeSegments(sampleSegments.Value.ToList(), minimumCallSize, 10000, copyNumbers, qscores, qScoreThreshold); mergedSegments.Add(sampleSegments.Key, mergedSegmentsThisSample); } return(mergedSegments); }
public void TestMergeSegments() { // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected // merges (and no others) occurred. List <CanvasSegment> allSegments = new List <CanvasSegment>(); List <SampleGenomicBin> counts = new List <SampleGenomicBin>(); // Chr1 gets five segments and we should merge to three: CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr1", 2000000, 2000100, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr1", 2000100, 3000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr1", 3000000, 3100000, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr1", 3100000, 4000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); // Chr2 gets segments with a large gap between, so can't merge: seg = new CanvasSegment("chr2", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr2", 3000000, 3000100, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr2", 4000000, 5000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); // Chr3 has three segments that all merge to 1 big one: seg = new CanvasSegment("chr3", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr3", 2000000, 3000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr3", 3000000, 4000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); var mergedSegments = CanvasSegment.MergeSegments(allSegments, 50000, 10000); var segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(mergedSegments); Assert.Equal(3, segmentsByChromosome["chr1"].Count); Assert.Equal(3, segmentsByChromosome["chr2"].Count); Assert.Single(segmentsByChromosome["chr3"]); }
public void TestMergeSegments() { // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected // merges (and no others) occurred. List <CanvasSegment> allSegments = new List <CanvasSegment>(); List <float> counts = new List <float>(); // Chr1 gets five segments and we should merge to three: CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr1", 2000000, 2000100, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr1", 2000100, 3000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr1", 3000000, 3100000, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr1", 3100000, 4000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); // Chr2 gets segments with a large gap between, so can't merge: seg = new CanvasSegment("chr2", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr2", 3000000, 3000100, counts); seg.CopyNumber = 3; allSegments.Add(seg); seg = new CanvasSegment("chr2", 4000000, 5000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); // Chr3 has three segments that all merge to 1 big one: seg = new CanvasSegment("chr3", 1000000, 2000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr3", 2000000, 3000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); seg = new CanvasSegment("chr3", 3000000, 4000000, counts); seg.CopyNumber = 2; allSegments.Add(seg); CanvasSegment.MergeSegments(ref allSegments, 50000, 10000); Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(allSegments); Assert.AreEqual(segmentsByChromosome["chr1"].Count, 3); Assert.AreEqual(segmentsByChromosome["chr2"].Count, 3); Assert.AreEqual(segmentsByChromosome["chr3"].Count, 1); }
public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName, string truthDataPath) { if (!string.IsNullOrEmpty(truthDataPath)) { _cnOracle = new CopyNumberOracle(); _cnOracle.LoadKnownCN(truthDataPath); } _segments = Segments.ReadSegments(_logger, new FileLocation(inFile)); _allSegments = _segments.AllSegments.ToList(); TempFolder = Path.GetDirectoryName(inFile); if (_allSegments.Count == 0) { Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made."); CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder, sampleName, null, null, QualityFilterThreshold, false, null, null); return(0); } PloidyInfo ploidy = null; if (!string.IsNullOrEmpty(ploidyVcfPath)) { ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath); } // load MAF var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome); _segments.AddAlleles(allelesByChromosome); MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average(); AggregateVariantCoverage(ref _allSegments); // Create new models for different copy number states InitializePloidies(); // Compute statistics on the copy number two regions float[] diploidCounts = AggregateCounts(ref _allSegments); _diploidCoverage = Utilities.Mean(diploidCounts); _coverageWeightingFactor = CoverageWeighting / _diploidCoverage; // new coverage model _model = new CoverageModel { DiploidCoverage = _diploidCoverage }; List <SegmentInfo> segments = new List <SegmentInfo>(); foreach (CanvasSegment segment in _allSegments) { SegmentInfo info = new SegmentInfo { Segment = segment }; List <double> mafs = new List <double>(); foreach (float value in segment.Balleles.Frequencies) { mafs.Add(value > 0.5 ? 1 - value : value); } if (mafs.Count > 0) { info.Maf = Utilities.Median(mafs); } else { info.Maf = -1; } info.Coverage = Utilities.Median(segment.Counts); info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount; segments.Add(info); } AssignPloidyCallsDistance(_model); CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters); // Merge neighboring segments that got the same copy number call. // merging segments requires quality scores so we do it after quality scores have been assigned var mergedSegments = CanvasSegment.MergeSegments(_allSegments); // recalculating qscores after merging segments improves performance! CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters); CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff); List <string> extraHeaders = new List <string>(); var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile); CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder); if (_cnOracle != null) { GenerateReportVersusKnownCopyNumber(); } if (!string.IsNullOrEmpty(ploidy?.HeaderLine)) { extraHeaders.Add(ploidy.HeaderLine); } CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName, extraHeaders, ploidy, QualityFilterThreshold, false, null, null); return(0); }
public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName, string truthDataPath) { if (!string.IsNullOrEmpty(truthDataPath)) { this.CNOracle = new CopyNumberOracle(); this.CNOracle.LoadKnownCN(truthDataPath); } this.Segments = CanvasSegment.ReadSegments(inFile); this.TempFolder = Path.GetDirectoryName(inFile); if (this.Segments.Count == 0) { Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made."); CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null); return(0); } PloidyInfo ploidy = null; if (!string.IsNullOrEmpty(ploidyBedPath)) { ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath); } // load MAF this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments); int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments); // Create new models for different copy number states this.InitializePloidies(); // Compute statistics on the copy number two regions float[] diploidCounts = AggregateCounts(ref this.Segments); DiploidCoverage = CanvasCommon.Utilities.Mean(diploidCounts); CoverageWeightingFactor = CoverageWeighting / DiploidCoverage; // new coverage model this.Model = new CoverageModel(); Model.DiploidCoverage = DiploidCoverage; List <SegmentInfo> segments = new List <SegmentInfo>(); foreach (CanvasSegment segment in this.Segments) { SegmentInfo info = new SegmentInfo(); info.Segment = segment; List <double> MAF = new List <double>(); foreach (float value in segment.VariantFrequencies) { MAF.Add(value > 0.5 ? 1 - value : value); } if (MAF.Count > 0) { info.MAF = CanvasCommon.Utilities.Median(MAF); } else { info.MAF = -1; } info.Coverage = CanvasCommon.Utilities.Median(segment.Counts); if (this.Segments.Count > 100) { info.Weight = segment.End - segment.Begin; } else { info.Weight = segment.BinCount; } segments.Add(info); } // Assign copy number and major chromosome count for each segment bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115): if (useGaussianMixtureModel) { // optimize model covariance double likelihood = FitGaussians(Model, segments); AssignPloidyCallsGaussianMixture(); } else { AssignPloidyCallsDistance(Model, segments, medianVariantCoverage); } // Merge neighboring segments that got the same copy number call. CanvasSegment.MergeSegments(ref this.Segments); CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline); List <string> extraHeaders = new List <string>(); string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile); CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder); if (this.CNOracle != null) { this.GenerateReportVersusKnownCN(); } if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine)) { extraHeaders.Add(ploidy.HeaderLine); } CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy); return(0); }