Ejemplo n.º 1
0
        private static void PostProcessAndWriteResults(SegmentationInput segmentationInput, string outPartitionedFile,
                                                       PloidyInfo referencePloidy, GenomeSegmentationResults segmentationResults)
        {
            var segments = segmentationInput.PostProcessSegments(segmentationResults, referencePloidy);

            segmentationInput.WriteCanvasPartitionResults(outPartitionedFile, segments);
        }
        public void NoSegments_ReturnsNoBedGraphEntries()
        {
            var calculator = new CopyNumberBedGraphCalculator();
            var segments   = Enumerable.Empty <CanvasSegment>().ToList();
            var ploidyInfo = new PloidyInfo();

            var results = calculator.Calculate(segments, ploidyInfo);

            Assert.Empty(results);
        }
Ejemplo n.º 3
0
 private SampleMetrics(double meanCoverage, double meanMafCoverage, double variance,
                       double mafVariance, int maxCoverage, PloidyInfo ploidy)
 {
     MeanCoverage    = meanCoverage;
     MeanMafCoverage = meanMafCoverage;
     Variance        = variance;
     MafVariance     = mafVariance;
     MaxCoverage     = maxCoverage;
     Ploidy          = ploidy;
 }
Ejemplo n.º 4
0
        public static int GetPloidy(PloidyInfo referencePloidy, string chrom, int start, int end, int defaultPloidy = 2)
        {
            if (referencePloidy == null)
            {
                return(defaultPloidy);
            }

            CanvasSegment segment = new CanvasSegment(chrom, start, end, new List <float>());

            return(referencePloidy.GetReferenceCopyNumber(segment));
        }
Ejemplo n.º 5
0
        public static void RatiosToCounts(IEnumerable <SampleGenomicBin> ratios, IFileLocation referencePloidyBedFile,
                                          IFileLocation outputPath)
        {
            PloidyInfo referencePloidy = null;

            if (referencePloidyBedFile != null && referencePloidyBedFile.Exists)
            {
                referencePloidy = PloidyInfo.LoadPloidyFromBedFile(referencePloidyBedFile.FullName);
            }

            CanvasIO.WriteToTextFile(outputPath.FullName, RatiosToCounts(ratios, referencePloidy));
        }
Ejemplo n.º 6
0
        public Dictionary <string, List <SegmentWithBins> > PostProcessSegments(
            GenomeSegmentationResults segmentationResults,
            PloidyInfo referencePloidy)
        {
            var excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >();

            if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath))
            {
                excludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath);
            }

            return(_processor.PostProcessSegments(segmentationResults, referencePloidy, excludedIntervals,
                                                  CoverageInfo));
        }
Ejemplo n.º 7
0
        public static SampleMetrics GetSampleInfo(IReadOnlyList <CanvasSegment> segments, string ploidyBedPath, int numberOfTrimmedBins, SampleId id)
        {
            double meanMafCoverage = new SortedList <int>(segments.SelectMany(x => x.Balleles.TotalCoverage)).Median();
            double variance        = Utilities.Variance(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).ToList());
            double mafVariance     = Utilities.Variance(segments.Where(x => x.Balleles.TotalCoverage.Count > 0)
                                                        .Select(x => x.Balleles.TotalCoverage.Average()).ToList());
            double meanCoverage = new SortedList <float>(segments.SelectMany(x => x.Counts).Select(x => x)).Median();
            int    maxCoverage  = Convert.ToInt16(segments.Select(x => x.TruncatedMedianCount(numberOfTrimmedBins)).Max()) + 10;
            var    ploidy       = new PloidyInfo();

            if (!ploidyBedPath.IsNullOrEmpty() && File.Exists(ploidyBedPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFile(ploidyBedPath, id.ToString());
            }
            return(new SampleMetrics(meanCoverage, meanMafCoverage, variance, mafVariance, maxCoverage, ploidy));
        }
        private bool IsNewSegment(Dictionary <string, bool> starts, string chr, List <SampleGenomicBin> excludeIntervals, uint previousBinEnd,
                                  uint end, uint start, ref int excludeIndex, PloidyInfo referencePloidy)
        {
            string key        = chr + ":" + start;
            bool   newSegment = starts.ContainsKey(key);

            if (excludeIntervals != null)
            {
                while (excludeIndex < excludeIntervals.Count && excludeIntervals[excludeIndex].Stop < previousBinEnd)
                {
                    excludeIndex++;
                }
                if (excludeIndex < excludeIntervals.Count)
                {
                    // Note: forbiddenZoneMid should never fall inside a bin, becuase these intervals were already excluded
                    // from consideration during the call to CanvasBin.
                    var excludeStart     = excludeIntervals[excludeIndex].Start;
                    var excludeStop      = excludeIntervals[excludeIndex].Stop;
                    int forbiddenZoneMid = (excludeStart + excludeStop) / 2;
                    if (previousBinEnd < forbiddenZoneMid && end >= forbiddenZoneMid)
                    {
                        //Debug.Assert(previousBinEnd <= excludeStart);
                        //Debug.Assert(start >= excludeStop);
                        newSegment = true;
                    }
                }
            }
            if (previousBinEnd > 0 && _maxInterBinDistInSegment >= 0 && previousBinEnd + _maxInterBinDistInSegment < start &&
                !newSegment)
            {
                newSegment = true;
            }
            // also start new segment if reference ploidy changes between end of last and end of this;
            // note that Interval takes 1-based positions, so using "previousBinEnd" effectively
            // includes the last base of the previous bin, allowing for a change at the bin boundary
            if (!newSegment && referencePloidy != null)
            {
                var refIval = new ReferenceInterval(chr, new Interval(previousBinEnd > 0 ? previousBinEnd : 1, end));
                if (!referencePloidy.IsUniformReferencePloidy(refIval))
                {
                    newSegment = true;
                }
            }
            return(newSegment);
        }
        private bool IsPassVariant(CanvasSegment segment, PloidyInfo ploidyInfo)
        {
            if (!segment.Filter.IsPass)
            {
                return(false);
            }
            var referenceCopyNumber = ploidyInfo?.GetReferenceCopyNumber(segment) ?? 2;

            if (segment.CopyNumber != referenceCopyNumber)
            {
                return(true);
            }
            if (segment.CopyNumber == 2 && segment.MajorChromosomeCount == 2)
            {
                return(true);                                                              //LOH
            }
            return(false);
        }
Ejemplo n.º 10
0
        private static void GetBinRatio(string tumorBinnedPath, string normalBinnedPath, string ratioBinnedPath,
                                        string ploidyBedPath, NexteraManifest manifest = null)
        {
            PloidyInfo referencePloidy   = String.IsNullOrEmpty(ploidyBedPath) ? null : PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            double     tumorMedian       = (new BinCounts(tumorBinnedPath, manifest: manifest)).OnTargetMedianBinCount;
            double     normalMedian      = (new BinCounts(normalBinnedPath, manifest: manifest)).OnTargetMedianBinCount;
            double     librarySizeFactor = (tumorMedian > 0 && normalMedian > 0) ? normalMedian / tumorMedian : 1;

            using (GzipReader tumorReader = new GzipReader(tumorBinnedPath))
                using (GzipReader normalReader = new GzipReader(normalBinnedPath))
                    using (GzipWriter writer = new GzipWriter(ratioBinnedPath))
                    {
                        string   normalLine;
                        string   tumorLine;
                        string[] normalToks;
                        string[] tumorToks;
                        double   normalCount;
                        double   tumorCount;
                        double   ratio;
                        while ((normalLine = normalReader.ReadLine()) != null)
                        {
                            tumorLine   = tumorReader.ReadLine();
                            normalToks  = normalLine.Split('\t');
                            tumorToks   = tumorLine.Split('\t');
                            normalCount = double.Parse(normalToks[3]);
                            tumorCount  = double.Parse(tumorToks[3]);
                            // The weighted average count of a bin could be less than 1.
                            // Using these small counts for coverage normalization creates large ratios.
                            // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling.
                            if (normalCount < 1)
                            {
                                continue;
                            }                          // skip the bin
                            string chrom = normalToks[0];
                            int    start = int.Parse(normalToks[1]);
                            int    end   = int.Parse(normalToks[2]);
                            // get the normal ploidy from intervalsWithPloidyByChrom
                            double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, chrom, start, end) / 2.0;
                            ratio         = tumorCount / normalCount * factor * librarySizeFactor;
                            normalToks[3] = String.Format("{0}", ratio);
                            writer.WriteLine(String.Join("\t", normalToks));
                        }
                    }
        }
        public void ReferenceCopyNumber_IsExcluded()
        {
            var calculator = new CopyNumberBedGraphCalculator();
            var segments   = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin>
                {
                    new SampleGenomicBin("chr1", 0, 1, 3f)
                })
                {
                    CopyNumber = 2,
                    Filter     = CanvasFilter.PassFilter
                }
            };
            var ploidyInfo = new PloidyInfo();

            var results = calculator.Calculate(segments, ploidyInfo).ToList();

            Assert.Empty(results);
        }
        public void FiltersNonPassSegments()
        {
            var calculator = new CopyNumberBedGraphCalculator();
            var segments   = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin>
                {
                    new SampleGenomicBin("chr1", 0, 1, 3f)
                })
                {
                    CopyNumber = 0,
                    Filter     = CanvasFilter.Create("NonPass".Yield())
                }
            };
            var ploidyInfo = new PloidyInfo();

            var results = calculator.Calculate(segments, ploidyInfo);

            Assert.Empty(results);
        }
        public void LOH_IsIncluded()
        {
            var calculator = new CopyNumberBedGraphCalculator();
            var segments   = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin>
                {
                    new SampleGenomicBin("chr1", 0, 1, 3f)
                })
                {
                    CopyNumber           = 2,
                    MajorChromosomeCount = 2,
                    Filter = CanvasFilter.PassFilter
                }
            };
            var ploidyInfo = new PloidyInfo();

            var results = calculator.Calculate(segments, ploidyInfo).ToList();

            Assert.Equal(2m, results.First().Value);
        }
        public void VariantCopyNumber_ReturnsCopyNumber()
        {
            var calculator = new CopyNumberBedGraphCalculator();
            var segments   = new List <CanvasSegment>
            {
                new CanvasSegment("chr1", 0, 1, new List <SampleGenomicBin>
                {
                    new SampleGenomicBin("chr1", 0, 1, 3f)
                })
                {
                    CopyNumber = 1,
                    Filter     = CanvasFilter.PassFilter
                }
            };
            var ploidyInfo = new PloidyInfo();

            var results = calculator.Calculate(segments, ploidyInfo).ToList();

            Assert.Equal("chr1", results.First().Chromosome);
            Assert.Equal(new BedInterval(0, 1), results.First().Interval);
            Assert.Equal(1m, results.First().Value);
        }
Ejemplo n.º 15
0
 private static IEnumerable <SampleGenomicBin> RatiosToCounts(IEnumerable <SampleGenomicBin> ratios, PloidyInfo referencePloidy)
 {
     foreach (SampleGenomicBin ratio in ratios)
     {
         // get the normal ploidy
         double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, ratio.GenomicBin.Chromosome, ratio.Start, ratio.Stop) / 2.0;
         double count  = ratio.Count * factor;
         yield return(new SampleGenomicBin(ratio.GenomicBin.Chromosome, ratio.Start, ratio.Stop, ratio.GenomicBin.GC, (float)count));
     }
 }
Ejemplo n.º 16
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            List <string>    cleanedFiles        = new List <string>();
            List <string>    outPartitionedFiles = new List <string>();
            List <string>    vafFiles            = new List <string>();
            bool             needHelp            = false;
            bool             isGermline          = false;
            string           filterBedFile       = null;
            string           referenceFolder     = null;
            string           commonCNVsbedPath   = null;
            string           evennessMetricFile  = null;
            SegmentSplitUndo undoMethod          = SegmentSplitUndo.None;

            SegmentationInput.SegmentationMethod partitionMethod = SegmentationInput.SegmentationMethod.Wavelets;
            string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPartition)), "CanvasPartitionParameters.json");
            string ploidyVcfPath       = null;


            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasClean", v => cleanedFiles.Add(v) },
                { "v|vaffile=", "variant frequencyfiles - usually generated by CanvasSNV", v => vafFiles.Add(v) },
                { "o|outfile=", "text file to output", v => outPartitionedFiles.Add(v) },
                { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationInput.SegmentationMethod)Enum.Parse(typeof(SegmentationInput.SegmentationMethod), v) },
                { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v },
                { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) },
                { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => filterBedFile = v },
                { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v },
                { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null },
                { $"{CommandLineOptions.EvennessMetricFile}=", "output file for evenness metric (optional)", v => evennessMetricFile = v },
                { "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyVcfPath = v },
                { "config=", "parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null }
            };

            List <string> extraArgs = p.Parse(args);

            if (extraArgs.Any())
            {
                throw new IlluminaException($"Unknown arguments: {string.Join(",", extraArgs)}");
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!cleanedFiles.Any() || !outPartitionedFiles.Any() || referenceFolder == null)
            {
                ShowHelp(p);
                return(0);
            }

            if (cleanedFiles.Any(inFile => !File.Exists(inFile)))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", cleanedFiles);
                return(1);
            }

            if (!string.IsNullOrEmpty(filterBedFile) && !File.Exists(filterBedFile))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", filterBedFile);
                return(1);
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting.");
                return(1);
            }

            if (!string.IsNullOrEmpty(ploidyVcfPath) && !File.Exists(ploidyVcfPath))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", ploidyVcfPath);
                return(1);
            }

            var parameterconfigFile       = new FileLocation(parameterconfigPath);
            var canvasPartitionParameters = Deserialize <CanvasPartitionParameters>(parameterconfigFile);

            ILogger logger    = new Logger(Console.Out.ToEnumerable(), Console.Error.ToEnumerable());
            var     processor = new SegmentationResultsProcessor(canvasPartitionParameters.MaxInterBinDistInSegment);

            var segmentationInputs = vafFiles.Count > 0 && vafFiles.Count == cleanedFiles.Count ?
                                     cleanedFiles.Zip(vafFiles, (inFile, vafFile) => new SegmentationInput(inFile, vafFile, filterBedFile,
                                                                                                           referenceFolder, evennessMetricFile, logger, processor)).ToList() :
                                     cleanedFiles.Select(inFile => new SegmentationInput(inFile, null, filterBedFile,
                                                                                         referenceFolder, evennessMetricFile, logger, processor)).ToList();
            GenomeSegmentationResults segmentationResults;
            PloidyInfo referencePloidy = ploidyVcfPath != null?PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath) : null;

            switch (partitionMethod)
            {
            default:     // use Wavelets if CBS is not selected
                Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now);
                var waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline,
                                                                                                commonCNVsbedPath, madFactor:
                                                                                                canvasPartitionParameters.MadFactor,
                                                                                                thresholdLowerMaf: canvasPartitionParameters.ThresholdLowerMaf,
                                                                                                evennessScoreThreshold: canvasPartitionParameters.EvennessScoreThreshold, verbose: 2));
                segmentationResults = new GenomeSegmentationResults(waveletsRunner.Run(segmentationInputs.Single(),
                                                                                       canvasPartitionParameters.EvennessScoreWindow));

                PostProcessAndWriteResults(segmentationInputs.Single(), outPartitionedFiles.Single(), referencePloidy, segmentationResults);
                break;

            case SegmentationInput.SegmentationMethod.CBS:
            {
                Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now);
                var cbsRunner = new CBSRunner(canvasPartitionParameters.MaxInterBinDistInSegment, undoMethod,
                                              canvasPartitionParameters.CBSalpha);
                var sampleSegmentations = new List <GenomeSegmentationResults>();
                foreach (var input in segmentationInputs)
                {
                    var segmentation = new GenomeSegmentationResults(cbsRunner.Run(input, verbose: 2));
                    sampleSegmentations.Add(segmentation);
                }

                segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations);
                foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles))
                {
                    PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults);
                }
                break;
            }

            case SegmentationInput.SegmentationMethod.HMM:
            {
                Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now);
                var  hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(cleanedFiles.Count);
                bool isPerSample = false;
                segmentationResults =
                    new GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationInputs, isPerSample));
                for (int i = 0; i < segmentationInputs.Count; i++)
                {
                    PostProcessAndWriteResults(segmentationInputs[i], outPartitionedFiles[i], referencePloidy, segmentationResults);
                }
                break;
            }

            case SegmentationInput.SegmentationMethod.PerSampleHMM:
            {
                Console.WriteLine("{0} Running Per-sample HMM Partitioning", DateTime.Now);
                var  hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(1);
                var  sampleSegmentations      = new List <GenomeSegmentationResults>();
                bool isPerSample = true;
                foreach (var input in segmentationInputs)
                {
                    var segmentation =
                        new GenomeSegmentationResults(
                            hiddenMarkovModelsRunner.Run(input.Yield().ToList(), isPerSample));
                    sampleSegmentations.Add(segmentation);
                }

                segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations);
                foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles))
                {
                    PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy,
                                               segmentationResults);
                }
                break;
            }
            }
            Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now);
            return(0);
        }
Ejemplo n.º 17
0
        public IEnumerable <BedGraphEntry> Calculate(IReadOnlyList <CanvasSegment> segments, PloidyInfo ploidyInfo)
        {
            var variantSegments = segments.Where(segment => IsPassVariant(segment, ploidyInfo));

            return(variantSegments.Select(GetCopyNumberEntry));
        }
Ejemplo n.º 18
0
        public Dictionary <string, List <SegmentWithBins> > PostProcessSegments(
            GenomeSegmentationResults segmentationResults,
            PloidyInfo referencePloidy, Dictionary <string, List <SampleGenomicBin> > excludedIntervals, CoverageInfo coverageInfo)
        {
            var starts = new Dictionary <string, bool>();
            var stops  = new Dictionary <string, bool>();

            foreach (string chr in segmentationResults.SegmentByChr.Keys)
            {
                for (int segmentIndex = 0; segmentIndex < segmentationResults.SegmentByChr[chr].Length; segmentIndex++)
                {
                    var segment = segmentationResults.SegmentByChr[chr][segmentIndex];
                    starts[chr + ":" + segment.start] = true;
                    stops[chr + ":" + segment.end]    = true;
                }
            }

            int segmentNum = -1;


            var segmentsByChromosome = new Dictionary <string, List <SegmentWithBins> >();

            foreach (string chr in coverageInfo.StartByChr.Keys)
            {
                segmentsByChromosome.Add(chr, new List <SegmentWithBins>());
                SegmentWithBins currentSegment = null;

                List <SampleGenomicBin> excludeIntervals = null;

                if (excludedIntervals.ContainsKey(chr))
                {
                    excludeIntervals = excludedIntervals[chr];
                }
                var  excludeIndex   = 0; // Points to the first interval which *doesn't* end before our current position
                uint previousBinEnd = 0;

                for (int binIndex = 0; binIndex < coverageInfo.StartByChr[chr].Length; binIndex++)
                {
                    uint start = coverageInfo.StartByChr[chr][binIndex];
                    uint end   = coverageInfo.EndByChr[chr][binIndex];

                    bool newSegment = IsNewSegment(starts, chr, excludeIntervals, previousBinEnd, end, start, ref excludeIndex, referencePloidy);

                    var bin = new Bin(start, end, coverageInfo.CoverageByChr[chr][binIndex]);
                    if (newSegment)
                    {
                        segmentNum++;
                        currentSegment = new SegmentWithBins(segmentNum, bin);
                        segmentsByChromosome[chr].Add(currentSegment);
                    }
                    else
                    {
                        if (currentSegment == null)
                        {
                            currentSegment = new SegmentWithBins(segmentNum, bin);
                            segmentsByChromosome[chr].Add(currentSegment);
                        }
                        else
                        {
                            currentSegment.AddBin(bin);
                        }
                    }


                    previousBinEnd = end;
                }
            }

            return(segmentsByChromosome);
        }
Ejemplo n.º 19
0
        public void Write(IReadOnlyList <CanvasSegment> segments, PloidyInfo ploidyInfo, BgzfFile location)
        {
            var entries = _calculator.Calculate(segments, ploidyInfo);

            _writer.Write(entries, location);
        }
Ejemplo n.º 20
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments   = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyBedPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            }

            // load MAF
            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);


            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage         = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;


            // new coverage model
            this.Model            = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List <double> MAF = new List <double>();
                foreach (float value in segment.VariantFrequencies)
                {
                    MAF.Add(value > 0.5 ? 1 - value : value);
                }

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);
                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):

            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            // Merge neighboring segments that got the same copy number call.
            CanvasSegment.MergeSegments(ref this.Segments);
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline);
            List <string> extraHeaders       = new List <string>();
            string        coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }
            CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy);
            return(0);
        }
Ejemplo n.º 21
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                _cnOracle = new CopyNumberOracle();
                _cnOracle.LoadKnownCN(truthDataPath);
            }

            _segments    = Segments.ReadSegments(_logger, new FileLocation(inFile));
            _allSegments = _segments.AllSegments.ToList();
            TempFolder   = Path.GetDirectoryName(inFile);
            if (_allSegments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, false, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath);
            }

            // load MAF
            var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome);

            _segments.AddAlleles(allelesByChromosome);
            MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average();
            AggregateVariantCoverage(ref _allSegments);

            // Create new models for different copy number states
            InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref _allSegments);
            _diploidCoverage         = Utilities.Mean(diploidCounts);
            _coverageWeightingFactor = CoverageWeighting / _diploidCoverage;
            // new coverage model
            _model = new CoverageModel {
                DiploidCoverage = _diploidCoverage
            };
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in _allSegments)
            {
                SegmentInfo info = new SegmentInfo {
                    Segment = segment
                };
                List <double> mafs = new List <double>();
                foreach (float value in segment.Balleles.Frequencies)
                {
                    mafs.Add(value > 0.5 ? 1 - value : value);
                }

                if (mafs.Count > 0)
                {
                    info.Maf = Utilities.Median(mafs);
                }
                else
                {
                    info.Maf = -1;
                }

                info.Coverage = Utilities.Median(segment.Counts);

                info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount;
                segments.Add(info);
            }

            AssignPloidyCallsDistance(_model);

            CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            var mergedSegments = CanvasSegment.MergeSegments(_allSegments);

            // recalculating qscores after merging segments improves performance!

            CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);
            CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff);

            List <string> extraHeaders       = new List <string>();
            var           coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (_cnOracle != null)
            {
                GenerateReportVersusKnownCopyNumber();
            }

            if (!string.IsNullOrEmpty(ploidy?.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, false, null, null);
            return(0);
        }
Ejemplo n.º 22
0
        public void PostProcessSegmentsTests()
        {
            var processor = new SegmentationResultsProcessor(100);

            var chr1Segments = new List <SegmentationInput.Segment>();

            chr1Segments.Add(new SegmentationInput.Segment()
            {
                start = 1, end = 1000
            });
            chr1Segments.Add(new SegmentationInput.Segment()
            {
                start = 1100, end = 4500
            });
            chr1Segments.Add(new SegmentationInput.Segment()
            {
                start = 4600, end = 5000
            });

            var segmentsByChrom = new Dictionary <string, SegmentationInput.Segment[]>();

            segmentsByChrom.Add("chr1", chr1Segments.ToArray());
            var segmentationResults = new GenomeSegmentationResults(segmentsByChrom);

            var ploidyInfo        = new PloidyInfo();
            var excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >();
            var coverageInfo      = new CoverageInfo();

            coverageInfo.CoverageByChr = new Dictionary <string, double[]>();
            coverageInfo.EndByChr      = new Dictionary <string, uint[]>();
            coverageInfo.StartByChr    = new Dictionary <string, uint[]>();
            coverageInfo.CoverageByChr.Add("chr1", new double[] { 10, 10, 50, 100, 25, 10 });
            coverageInfo.StartByChr.Add("chr1", new uint[] { 100, 600, 1200, 1300, 4001, 5000 });
            coverageInfo.EndByChr.Add("chr1", new uint[] { 500, 890, 1299, 4000, 4500, 5050 });

            var results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo);

            var chr1Results = results["chr1"];

            Assert.Equal(3, chr1Results.Count);

            // Final segments should reflect boundaries of actual bins within them
            //  (in practice, these probably shouldn't disagree? but let's go theoretical here)
            SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 890, 10, 2);
            SegmentTestHelpers.CheckSegment(chr1Results[1], 1200, 4500, 50, 3);
            SegmentTestHelpers.CheckSegment(chr1Results[2], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?)

            // Add forbidden zone between two bins of the same original segment, this should split up the affected segment
            excludedIntervals.Add("chr1", new List <SampleGenomicBin>()
            {
                GetForbiddenZone("chr1", 525, 575)
            });                                                                                              // Mid = 550, in between the bins of the first segment
            results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo);

            chr1Results = results["chr1"];
            Assert.Equal(4, chr1Results.Count);

            SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1);
            SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1);
            SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3);
            SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?)

            // Forbidden zone midpoint is in the second bin -- apparently this is presumed to never happen because it would have already been taken care of
            // This fails the test with the Debug Asserts in there. Otherwise it would be counted as a new bin
            excludedIntervals.Clear();
            excludedIntervals.Add("chr1", new List <SampleGenomicBin>()
            {
                GetForbiddenZone("chr1", 585, 635)
            });                                                                                                 // Mid = 610, in second bin
            results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo);

            chr1Results = results["chr1"];
            Assert.Equal(4, chr1Results.Count);

            SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1);
            SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1);
            SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3);
            SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?)

            // Forbidden zone midpoint is in the first bin although it ends between bins -- apparently this is presumed to never happen because it would have already been taken care of
            // Note the asymmetry compared to the above
            excludedIntervals.Clear();
            excludedIntervals.Add("chr1", new List <SampleGenomicBin>()
            {
                GetForbiddenZone("chr1", 465, 515)
            });                                                                                                 // Mid = 490, in first bin of first segment
            results = processor.PostProcessSegments(segmentationResults, ploidyInfo, excludedIntervals, coverageInfo);

            chr1Results = results["chr1"];
            // Would fail - asymmetry. What do we want?
            // Leave as-is for now so as not to change the behavior in this (unrelated) feature addition
            //Assert.Equal(4, chr1Results.Count);

            //SegmentTestHelpers.CheckSegment(chr1Results[0], 100, 500, 10, 1);
            //SegmentTestHelpers.CheckSegment(chr1Results[1], 600, 890, 10, 1);
            //SegmentTestHelpers.CheckSegment(chr1Results[2], 1200, 4500, 50, 3);
            //SegmentTestHelpers.CheckSegment(chr1Results[3], 5000, 5050, 10, 1); // Bin extends past segment - still keep it (?)

            // TODO test where no segment covers bins?
            // TODO overlapping segments or bins?
            // TODO bin starts before segment
            // TODO test interbin dist
        }