private static void PostProcessAndWriteResults(SegmentationInput segmentationInput, string outPartitionedFile, PloidyInfo referencePloidy, GenomeSegmentationResults segmentationResults) { var segments = segmentationInput.PostProcessSegments(segmentationResults, referencePloidy); segmentationInput.WriteCanvasPartitionResults(outPartitionedFile, segments); }
public void NoSegments_ReturnsNoBedGraphEntries() { var calculator = new CopyNumberBedGraphCalculator(); var segments = Enumerable.Empty <CanvasSegment>().ToList(); var ploidyInfo = new PloidyInfo(); var results = calculator.Calculate(segments, ploidyInfo); Assert.Empty(results); }
private SampleMetrics(double meanCoverage, double meanMafCoverage, double variance, double mafVariance, int maxCoverage, PloidyInfo ploidy) { MeanCoverage = meanCoverage; MeanMafCoverage = meanMafCoverage; Variance = variance; MafVariance = mafVariance; MaxCoverage = maxCoverage; Ploidy = ploidy; }
public static int GetPloidy(PloidyInfo referencePloidy, string chrom, int start, int end, int defaultPloidy = 2) { if (referencePloidy == null) { return(defaultPloidy); } CanvasSegment segment = new CanvasSegment(chrom, start, end, new List <float>()); return(referencePloidy.GetReferenceCopyNumber(segment)); }
public static void RatiosToCounts(IEnumerable <SampleGenomicBin> ratios, IFileLocation referencePloidyBedFile, IFileLocation outputPath) { PloidyInfo referencePloidy = null; if (referencePloidyBedFile != null && referencePloidyBedFile.Exists) { referencePloidy = PloidyInfo.LoadPloidyFromBedFile(referencePloidyBedFile.FullName); } CanvasIO.WriteToTextFile(outputPath.FullName, RatiosToCounts(ratios, referencePloidy)); }
public Dictionary <string, List <SegmentWithBins> > PostProcessSegments( GenomeSegmentationResults segmentationResults, PloidyInfo referencePloidy) { var excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >(); if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath)) { excludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath); } return(_processor.PostProcessSegments(segmentationResults, referencePloidy, excludedIntervals, CoverageInfo)); }
public static SampleMetrics GetSampleInfo(IReadOnlyList <CanvasSegment> segments, string ploidyBedPath, int numberOfTrimmedBins, SampleId id) { double meanMafCoverage = new SortedList <int>(segments.SelectMany(x => x.Balleles.TotalCoverage)).Median(); double variance = Utilities.Variance(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).ToList()); double mafVariance = Utilities.Variance(segments.Where(x => x.Balleles.TotalCoverage.Count > 0) .Select(x => x.Balleles.TotalCoverage.Average()).ToList()); double meanCoverage = new SortedList <float>(segments.SelectMany(x => x.Counts).Select(x => x)).Median(); int maxCoverage = Convert.ToInt16(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).Max()) + 10; var ploidy = new PloidyInfo(); if (!ploidyBedPath.IsNullOrEmpty() && File.Exists(ploidyBedPath)) { ploidy = PloidyInfo.LoadPloidyFromVcfFile(ploidyBedPath, id.ToString()); } return(new SampleMetrics(meanCoverage, meanMafCoverage, variance, mafVariance, maxCoverage, ploidy)); }
private bool IsNewSegment(Dictionary <string, bool> starts, string chr, List <SampleGenomicBin> excludeIntervals, uint previousBinEnd, uint end, uint start, ref int excludeIndex, PloidyInfo referencePloidy) { string key = chr + ":" + start; bool newSegment = starts.ContainsKey(key); if (excludeIntervals != null) { while (excludeIndex < excludeIntervals.Count && excludeIntervals[excludeIndex].Stop < previousBinEnd) { excludeIndex++; } if (excludeIndex < excludeIntervals.Count) { // Note: forbiddenZoneMid should never fall inside a bin, becuase these intervals were already excluded // from consideration during the call to CanvasBin. var excludeStart = excludeIntervals[excludeIndex].Start; var excludeStop = excludeIntervals[excludeIndex].Stop; int forbiddenZoneMid = (excludeStart + excludeStop) / 2; if (previousBinEnd < forbiddenZoneMid && end >= forbiddenZoneMid) { //Debug.Assert(previousBinEnd <= excludeStart); //Debug.Assert(start >= excludeStop); newSegment = true; } } } if (previousBinEnd > 0 && _maxInterBinDistInSegment >= 0 && previousBinEnd + _maxInterBinDistInSegment < start && !newSegment) { newSegment = true; } // also start new segment if reference ploidy changes between end of last and end of this; // note that Interval takes 1-based positions, so using "previousBinEnd" effectively // includes the last base of the previous bin, allowing for a change at the bin boundary if (!newSegment && referencePloidy != null) { var refIval = new ReferenceInterval(chr, new Interval(previousBinEnd > 0 ? previousBinEnd : 1, end)); if (!referencePloidy.IsUniformReferencePloidy(refIval)) { newSegment = true; } } return(newSegment); }
private bool IsPassVariant(CanvasSegment segment, PloidyInfo ploidyInfo) { if (!segment.Filter.IsPass) { return(false); } var referenceCopyNumber = ploidyInfo?.GetReferenceCopyNumber(segment) ?? 2; if (segment.CopyNumber != referenceCopyNumber) { return(true); } if (segment.CopyNumber == 2 && segment.MajorChromosomeCount == 2) { return(true); //LOH } return(false); }
private static void GetBinRatio(string tumorBinnedPath, string normalBinnedPath, string ratioBinnedPath, string ploidyBedPath, NexteraManifest manifest = null) { PloidyInfo referencePloidy = String.IsNullOrEmpty(ploidyBedPath) ? null : PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath); double tumorMedian = (new BinCounts(tumorBinnedPath, manifest: manifest)).OnTargetMedianBinCount; double normalMedian = (new BinCounts(normalBinnedPath, manifest: manifest)).OnTargetMedianBinCount; double librarySizeFactor = (tumorMedian > 0 && normalMedian > 0) ? normalMedian / tumorMedian : 1; using (GzipReader tumorReader = new GzipReader(tumorBinnedPath)) using (GzipReader normalReader = new GzipReader(normalBinnedPath)) using (GzipWriter writer = new GzipWriter(ratioBinnedPath)) { string normalLine; string tumorLine; string[] normalToks; string[] tumorToks; double normalCount; double tumorCount; double ratio; while ((normalLine = normalReader.ReadLine()) != null) { tumorLine = tumorReader.ReadLine(); normalToks = normalLine.Split('\t'); tumorToks = tumorLine.Split('\t'); normalCount = double.Parse(normalToks[3]); tumorCount = double.Parse(tumorToks[3]); // The weighted average count of a bin could be less than 1. // Using these small counts for coverage normalization creates large ratios. // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling. if (normalCount < 1) { continue; } // skip the bin string chrom = normalToks[0]; int start = int.Parse(normalToks[1]); int end = int.Parse(normalToks[2]); // get the normal ploidy from intervalsWithPloidyByChrom double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, chrom, start, end) / 2.0; ratio = tumorCount / normalCount * factor * librarySizeFactor; normalToks[3] = String.Format("{0}", ratio); writer.WriteLine(String.Join("\t", normalToks)); } } }
public void ReferenceCopyNumber_IsExcluded() { var calculator = new CopyNumberBedGraphCalculator(); var segments = new List <CanvasSegment> { new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 0, 1, 3f) }) { CopyNumber = 2, Filter = CanvasFilter.PassFilter } }; var ploidyInfo = new PloidyInfo(); var results = calculator.Calculate(segments, ploidyInfo).ToList(); Assert.Empty(results); }
public void FiltersNonPassSegments() { var calculator = new CopyNumberBedGraphCalculator(); var segments = new List <CanvasSegment> { new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 0, 1, 3f) }) { CopyNumber = 0, Filter = CanvasFilter.Create("NonPass".Yield()) } }; var ploidyInfo = new PloidyInfo(); var results = calculator.Calculate(segments, ploidyInfo); Assert.Empty(results); }
public void LOH_IsIncluded() { var calculator = new CopyNumberBedGraphCalculator(); var segments = new List <CanvasSegment> { new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 0, 1, 3f) }) { CopyNumber = 2, MajorChromosomeCount = 2, Filter = CanvasFilter.PassFilter } }; var ploidyInfo = new PloidyInfo(); var results = calculator.Calculate(segments, ploidyInfo).ToList(); Assert.Equal(2m, results.First().Value); }
public void VariantCopyNumber_ReturnsCopyNumber() { var calculator = new CopyNumberBedGraphCalculator(); var segments = new List <CanvasSegment> { new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin> { new SampleGenomicBin("chr1", 0, 1, 3f) }) { CopyNumber = 1, Filter = CanvasFilter.PassFilter } }; var ploidyInfo = new PloidyInfo(); var results = calculator.Calculate(segments, ploidyInfo).ToList(); Assert.Equal("chr1", results.First().Chromosome); Assert.Equal(new BedInterval(0, 1), results.First().Interval); Assert.Equal(1m, results.First().Value); }
private static IEnumerable <SampleGenomicBin> RatiosToCounts(IEnumerable <SampleGenomicBin> ratios, PloidyInfo referencePloidy) { foreach (SampleGenomicBin ratio in ratios) { // get the normal ploidy double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, ratio.GenomicBin.Chromosome, ratio.Start, ratio.Stop) / 2.0; double count = ratio.Count * factor; yield return(new SampleGenomicBin(ratio.GenomicBin.Chromosome, ratio.Start, ratio.Stop, ratio.GenomicBin.GC, (float)count)); } }
static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); List <string> cleanedFiles = new List <string>(); List <string> outPartitionedFiles = new List <string>(); List <string> vafFiles = new List <string>(); bool needHelp = false; bool isGermline = false; string filterBedFile = null; string referenceFolder = null; string commonCNVsbedPath = null; string evennessMetricFile = null; SegmentSplitUndo undoMethod = SegmentSplitUndo.None; SegmentationInput.SegmentationMethod partitionMethod = SegmentationInput.SegmentationMethod.Wavelets; string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPartition)), "CanvasPartitionParameters.json"); string ploidyVcfPath = null; OptionSet p = new OptionSet() { { "i|infile=", "input file - usually generated by CanvasClean", v => cleanedFiles.Add(v) }, { "v|vaffile=", "variant frequencyfiles - usually generated by CanvasSNV", v => vafFiles.Add(v) }, { "o|outfile=", "text file to output", v => outPartitionedFiles.Add(v) }, { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationInput.SegmentationMethod)Enum.Parse(typeof(SegmentationInput.SegmentationMethod), v) }, { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v }, { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) }, { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => filterBedFile = v }, { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v }, { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null }, { $"{CommandLineOptions.EvennessMetricFile}=", "output file for evenness metric (optional)", v => evennessMetricFile = v }, { "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyVcfPath = v }, { "config=", "parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null } }; List <string> extraArgs = p.Parse(args); if (extraArgs.Any()) { throw new IlluminaException($"Unknown arguments: {string.Join(",", extraArgs)}"); } if (needHelp) { ShowHelp(p); return(0); } if (!cleanedFiles.Any() || !outPartitionedFiles.Any() || referenceFolder == null) { ShowHelp(p); return(0); } if (cleanedFiles.Any(inFile => !File.Exists(inFile))) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", cleanedFiles); return(1); } if (!string.IsNullOrEmpty(filterBedFile) && !File.Exists(filterBedFile)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", filterBedFile); return(1); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting."); return(1); } if (!string.IsNullOrEmpty(ploidyVcfPath) && !File.Exists(ploidyVcfPath)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", ploidyVcfPath); return(1); } var parameterconfigFile = new FileLocation(parameterconfigPath); var canvasPartitionParameters = Deserialize <CanvasPartitionParameters>(parameterconfigFile); ILogger logger = new Logger(Console.Out.ToEnumerable(), Console.Error.ToEnumerable()); var processor = new SegmentationResultsProcessor(canvasPartitionParameters.MaxInterBinDistInSegment); var segmentationInputs = vafFiles.Count > 0 && vafFiles.Count == cleanedFiles.Count ? cleanedFiles.Zip(vafFiles, (inFile, vafFile) => new SegmentationInput(inFile, vafFile, filterBedFile, referenceFolder, evennessMetricFile, logger, processor)).ToList() : cleanedFiles.Select(inFile => new SegmentationInput(inFile, null, filterBedFile, referenceFolder, evennessMetricFile, logger, processor)).ToList(); GenomeSegmentationResults segmentationResults; PloidyInfo referencePloidy = ploidyVcfPath != null?PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath) : null; switch (partitionMethod) { default: // use Wavelets if CBS is not selected Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now); var waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline, commonCNVsbedPath, madFactor: canvasPartitionParameters.MadFactor, thresholdLowerMaf: canvasPartitionParameters.ThresholdLowerMaf, evennessScoreThreshold: canvasPartitionParameters.EvennessScoreThreshold, verbose: 2)); segmentationResults = new GenomeSegmentationResults(waveletsRunner.Run(segmentationInputs.Single(), canvasPartitionParameters.EvennessScoreWindow)); PostProcessAndWriteResults(segmentationInputs.Single(), outPartitionedFiles.Single(), referencePloidy, segmentationResults); break; case SegmentationInput.SegmentationMethod.CBS: { Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now); var cbsRunner = new CBSRunner(canvasPartitionParameters.MaxInterBinDistInSegment, undoMethod, canvasPartitionParameters.CBSalpha); var sampleSegmentations = new List <GenomeSegmentationResults>(); foreach (var input in segmentationInputs) { var segmentation = new GenomeSegmentationResults(cbsRunner.Run(input, verbose: 2)); sampleSegmentations.Add(segmentation); } segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations); foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles)) { PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults); } break; } case SegmentationInput.SegmentationMethod.HMM: { Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now); var hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(cleanedFiles.Count); bool isPerSample = false; segmentationResults = new GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationInputs, isPerSample)); for (int i = 0; i < segmentationInputs.Count; i++) { PostProcessAndWriteResults(segmentationInputs[i], outPartitionedFiles[i], referencePloidy, segmentationResults); } break; } case SegmentationInput.SegmentationMethod.PerSampleHMM: { Console.WriteLine("{0} Running Per-sample HMM Partitioning", DateTime.Now); var hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(1); var sampleSegmentations = new List <GenomeSegmentationResults>(); bool isPerSample = true; foreach (var input in segmentationInputs) { var segmentation = new GenomeSegmentationResults( hiddenMarkovModelsRunner.Run(input.Yield().ToList(), isPerSample)); sampleSegmentations.Add(segmentation); } segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations); foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles)) { PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults); } break; } } Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now); return(0); }
public IEnumerable <BedGraphEntry> Calculate(IReadOnlyList <CanvasSegment> segments, PloidyInfo ploidyInfo) { var variantSegments = segments.Where(segment => IsPassVariant(segment, ploidyInfo)); return(variantSegments.Select(GetCopyNumberEntry)); }
public Dictionary <string, List <SegmentWithBins> > PostProcessSegments( GenomeSegmentationResults segmentationResults, PloidyInfo referencePloidy, Dictionary <string, List <SampleGenomicBin> > excludedIntervals, CoverageInfo coverageInfo) { var starts = new Dictionary <string, bool>(); var stops = new Dictionary <string, bool>(); foreach (string chr in segmentationResults.SegmentByChr.Keys) { for (int segmentIndex = 0; segmentIndex < segmentationResults.SegmentByChr[chr].Length; segmentIndex++) { var segment = segmentationResults.SegmentByChr[chr][segmentIndex]; starts[chr + ":" + segment.start] = true; stops[chr + ":" + segment.end] = true; } } int segmentNum = -1; var segmentsByChromosome = new Dictionary <string, List <SegmentWithBins> >(); foreach (string chr in coverageInfo.StartByChr.Keys) { segmentsByChromosome.Add(chr, new List <SegmentWithBins>()); SegmentWithBins currentSegment = null; List <SampleGenomicBin> excludeIntervals = null; if (excludedIntervals.ContainsKey(chr)) { excludeIntervals = excludedIntervals[chr]; } var excludeIndex = 0; // Points to the first interval which *doesn't* end before our current position uint previousBinEnd = 0; for (int binIndex = 0; binIndex < coverageInfo.StartByChr[chr].Length; binIndex++) { uint start = coverageInfo.StartByChr[chr][binIndex]; uint end = coverageInfo.EndByChr[chr][binIndex]; bool newSegment = IsNewSegment(starts, chr, excludeIntervals, previousBinEnd, end, start, ref excludeIndex, referencePloidy); var bin = new Bin(start, end, coverageInfo.CoverageByChr[chr][binIndex]); if (newSegment) { segmentNum++; currentSegment = new SegmentWithBins(segmentNum, bin); segmentsByChromosome[chr].Add(currentSegment); } else { if (currentSegment == null) { currentSegment = new SegmentWithBins(segmentNum, bin); segmentsByChromosome[chr].Add(currentSegment); } else { currentSegment.AddBin(bin); } } previousBinEnd = end; } } return(segmentsByChromosome); }
public void Write(IReadOnlyList <CanvasSegment> segments, PloidyInfo ploidyInfo, BgzfFile location) { var entries = _calculator.Calculate(segments, ploidyInfo); _writer.Write(entries, location); }
public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName, string truthDataPath) { if (!string.IsNullOrEmpty(truthDataPath)) { this.CNOracle = new CopyNumberOracle(); this.CNOracle.LoadKnownCN(truthDataPath); } this.Segments = CanvasSegment.ReadSegments(inFile); this.TempFolder = Path.GetDirectoryName(inFile); if (this.Segments.Count == 0) { Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made."); CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null); return(0); } PloidyInfo ploidy = null; if (!string.IsNullOrEmpty(ploidyBedPath)) { ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath); } // load MAF this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments); int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments); // Create new models for different copy number states this.InitializePloidies(); // Compute statistics on the copy number two regions float[] diploidCounts = AggregateCounts(ref this.Segments); DiploidCoverage = CanvasCommon.Utilities.Mean(diploidCounts); CoverageWeightingFactor = CoverageWeighting / DiploidCoverage; // new coverage model this.Model = new CoverageModel(); Model.DiploidCoverage = DiploidCoverage; List <SegmentInfo> segments = new List <SegmentInfo>(); foreach (CanvasSegment segment in this.Segments) { SegmentInfo info = new SegmentInfo(); info.Segment = segment; List <double> MAF = new List <double>(); foreach (float value in segment.VariantFrequencies) { MAF.Add(value > 0.5 ? 1 - value : value); } if (MAF.Count > 0) { info.MAF = CanvasCommon.Utilities.Median(MAF); } else { info.MAF = -1; } info.Coverage = CanvasCommon.Utilities.Median(segment.Counts); if (this.Segments.Count > 100) { info.Weight = segment.End - segment.Begin; } else { info.Weight = segment.BinCount; } segments.Add(info); } // Assign copy number and major chromosome count for each segment bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115): if (useGaussianMixtureModel) { // optimize model covariance double likelihood = FitGaussians(Model, segments); AssignPloidyCallsGaussianMixture(); } else { AssignPloidyCallsDistance(Model, segments, medianVariantCoverage); } // Merge neighboring segments that got the same copy number call. CanvasSegment.MergeSegments(ref this.Segments); CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline); List <string> extraHeaders = new List <string>(); string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile); CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder); if (this.CNOracle != null) { this.GenerateReportVersusKnownCN(); } if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine)) { extraHeaders.Add(ploidy.HeaderLine); } CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy); return(0); }
public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName, string truthDataPath) { if (!string.IsNullOrEmpty(truthDataPath)) { _cnOracle = new CopyNumberOracle(); _cnOracle.LoadKnownCN(truthDataPath); } _segments = Segments.ReadSegments(_logger, new FileLocation(inFile)); _allSegments = _segments.AllSegments.ToList(); TempFolder = Path.GetDirectoryName(inFile); if (_allSegments.Count == 0) { Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made."); CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder, sampleName, null, null, QualityFilterThreshold, false, null, null); return(0); } PloidyInfo ploidy = null; if (!string.IsNullOrEmpty(ploidyVcfPath)) { ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath); } // load MAF var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome); _segments.AddAlleles(allelesByChromosome); MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average(); AggregateVariantCoverage(ref _allSegments); // Create new models for different copy number states InitializePloidies(); // Compute statistics on the copy number two regions float[] diploidCounts = AggregateCounts(ref _allSegments); _diploidCoverage = Utilities.Mean(diploidCounts); _coverageWeightingFactor = CoverageWeighting / _diploidCoverage; // new coverage model _model = new CoverageModel { DiploidCoverage = _diploidCoverage }; List <SegmentInfo> segments = new List <SegmentInfo>(); foreach (CanvasSegment segment in _allSegments) { SegmentInfo info = new SegmentInfo { Segment = segment }; List <double> mafs = new List <double>(); foreach (float value in segment.Balleles.Frequencies) { mafs.Add(value > 0.5 ? 1 - value : value); } if (mafs.Count > 0) { info.Maf = Utilities.Median(mafs); } else { info.Maf = -1; } info.Coverage = Utilities.Median(segment.Counts); info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount; segments.Add(info); } AssignPloidyCallsDistance(_model); CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters); // Merge neighboring segments that got the same copy number call. // merging segments requires quality scores so we do it after quality scores have been assigned var mergedSegments = CanvasSegment.MergeSegments(_allSegments); // recalculating qscores after merging segments improves performance! CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters); CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff); List <string> extraHeaders = new List <string>(); var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile); CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder); if (_cnOracle != null) { GenerateReportVersusKnownCopyNumber(); } if (!string.IsNullOrEmpty(ploidy?.HeaderLine)) { extraHeaders.Add(ploidy.HeaderLine); } CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName, extraHeaders, ploidy, QualityFilterThreshold, false, null, null); return(0); }
public void PostProcessSegmentsTests() { var processor = new SegmentationResultsProcessor(100); var chr1Segments = new List <SegmentationInput.Segment>(); chr1Segments.Add(new SegmentationInput.Segment() { start = 1, end = 1000 }); chr1Segments.Add(new SegmentationInput.Segment() { start = 1100, end = 4500 }); chr1Segments.Add(new SegmentationInput.Segment() { start = 4600, end = 5000 }); var segmentsByChrom = new Dictionary <string, SegmentationInput.Segment[]>(); segmentsByChrom.Add("chr1", chr1Segments.ToArray()); var segmentationResults = new GenomeSegmentationResults(segmentsByChrom); var ploidyInfo = new PloidyInfo(); var excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >(); var coverageInfo = new CoverageInfo(); coverageInfo.CoverageByChr = new Dictionary <string, double[]>(); coverageInfo.EndByChr = new Dictionary <string, uint[]>(); coverageInfo.StartByChr = new Dictionary <string, uint[]>(); coverageInfo.CoverageByChr.Add("chr1", new double[] { 10, 10, 50, 100, 25, 10 }); coverageInfo.StartByChr.Add("chr1", new uint[] { 100, 600, 1200, 1300, 4001, 5000 }); coverageInfo.EndByChr.Add("chr1", new uint[] { 500, 890, 1299, 4000, 4500, 5050 }); var results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo); var chr1Results = results["chr1"]; Assert.Equal(3, chr1Results.Count); // Final segments should reflect boundaries of actual bins within them // (in practice, these probably shouldn't disagree? but let's go theoretical here) SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 890, 10, 2); SegmentTestHelpers.CheckSegment(chr1Results[1], 1200, 4500, 50, 3); SegmentTestHelpers.CheckSegment(chr1Results[2], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?) // Add forbidden zone between two bins of the same original segment, this should split up the affected segment excludedIntervals.Add("chr1", new List <SampleGenomicBin>() { GetForbiddenZone("chr1", 525, 575) }); // Mid = 550, in between the bins of the first segment results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo); chr1Results = results["chr1"]; Assert.Equal(4, chr1Results.Count); SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1); SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1); SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3); SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?) // Forbidden zone midpoint is in the second bin -- apparently this is presumed to never happen because it would have already been taken care of // This fails the test with the Debug Asserts in there. Otherwise it would be counted as a new bin excludedIntervals.Clear(); excludedIntervals.Add("chr1", new List <SampleGenomicBin>() { GetForbiddenZone("chr1", 585, 635) }); // Mid = 610, in second bin results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo); chr1Results = results["chr1"]; Assert.Equal(4, chr1Results.Count); SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1); SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1); SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3); SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?) // Forbidden zone midpoint is in the first bin although it ends between bins -- apparently this is presumed to never happen because it would have already been taken care of // Note the asymmetry compared to the above excludedIntervals.Clear(); excludedIntervals.Add("chr1", new List <SampleGenomicBin>() { GetForbiddenZone("chr1", 465, 515) }); // Mid = 490, in first bin of first segment results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo); chr1Results = results["chr1"]; // Would fail - asymmetry. What do we want? // Leave as-is for now so as not to change the behavior in this (unrelated) feature addition //Assert.Equal(4, chr1Results.Count); //SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1); //SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1); //SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3); //SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?) // TODO test where no segment covers bins? // TODO overlapping segments or bins? // TODO bin starts before segment // TODO test interbin dist }