Esempio n. 1
0
        public void TestReadSegments()
        {
            var partitioned = "";

            partitioned += "chr22\t1\t10\t14.00\t0\n";
            partitioned += "chr22\t10\t30\t31.00\t1\n";
            partitioned += "chr22\t30\t40\t6.00\t2\n";
            var      stringReader = new StringReader(partitioned);
            Segments segments;

            using (var reader = new GzipOrTextReader(stringReader))
            {
                segments = Segments.ReadSegments(reader);
            }

            Assert.Equal(segments.GetSegmentsForChromosome("chr22"), segments.AllSegments);
            var confidenceInterval = segments.AllSegments.First().StartConfidenceInterval;

            AssertConfidenceInterval(-5, 5, confidenceInterval);

            confidenceInterval = segments.AllSegments.First().EndConfidenceInterval;
            AssertConfidenceInterval(-5, 10, confidenceInterval);

            confidenceInterval = segments.AllSegments[1].StartConfidenceInterval;
            AssertConfidenceInterval(-5, 10, confidenceInterval);

            confidenceInterval = segments.AllSegments[1].EndConfidenceInterval;
            AssertConfidenceInterval(-10, 5, confidenceInterval);

            confidenceInterval = segments.AllSegments.Last().StartConfidenceInterval;
            AssertConfidenceInterval(-10, 5, confidenceInterval);

            confidenceInterval = segments.AllSegments.Last().EndConfidenceInterval;
            AssertConfidenceInterval(-5, 5, confidenceInterval);
        }
Esempio n. 2
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyVcfPath, string referenceFolder, string sampleName,
                                string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                _cnOracle = new CopyNumberOracle();
                _cnOracle.LoadKnownCN(truthDataPath);
            }

            _segments    = Segments.ReadSegments(_logger, new FileLocation(inFile));
            _allSegments = _segments.AllSegments.ToList();
            TempFolder   = Path.GetDirectoryName(inFile);
            if (_allSegments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegmentWriter.WriteSegments(outFile, _allSegments, _model?.DiploidCoverage, referenceFolder,
                                                  sampleName, null, null, QualityFilterThreshold, false, null, null);
                return(0);
            }
            PloidyInfo ploidy = null;

            if (!string.IsNullOrEmpty(ploidyVcfPath))
            {
                ploidy = PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath);
            }

            // load MAF
            var allelesByChromosome = CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFile), _segments.IntervalsByChromosome);

            _segments.AddAlleles(allelesByChromosome);
            MeanCoverage = allelesByChromosome.SelectMany(x => x.Value).SelectMany(y => y.TotalCoverage).Average();
            AggregateVariantCoverage(ref _allSegments);

            // Create new models for different copy number states
            InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref _allSegments);
            _diploidCoverage         = Utilities.Mean(diploidCounts);
            _coverageWeightingFactor = CoverageWeighting / _diploidCoverage;
            // new coverage model
            _model = new CoverageModel {
                DiploidCoverage = _diploidCoverage
            };
            List <SegmentInfo> segments = new List <SegmentInfo>();

            foreach (CanvasSegment segment in _allSegments)
            {
                SegmentInfo info = new SegmentInfo {
                    Segment = segment
                };
                List <double> mafs = new List <double>();
                foreach (float value in segment.Balleles.Frequencies)
                {
                    mafs.Add(value > 0.5 ? 1 - value : value);
                }

                if (mafs.Count > 0)
                {
                    info.Maf = Utilities.Median(mafs);
                }
                else
                {
                    info.Maf = -1;
                }

                info.Coverage = Utilities.Median(segment.Counts);

                info.Weight = _allSegments.Count > 100 ? segment.Length : segment.BinCount;
                segments.Add(info);
            }

            AssignPloidyCallsDistance(_model);

            CanvasSegment.AssignQualityScores(_allSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);

            // Merge neighboring segments that got the same copy number call.
            // merging segments requires quality scores so we do it after quality scores have been assigned
            var mergedSegments = CanvasSegment.MergeSegments(_allSegments);

            // recalculating qscores after merging segments improves performance!

            CanvasSegment.AssignQualityScores(mergedSegments, CanvasSegment.QScoreMethod.LogisticGermline, _germlineScoreParameters);
            CanvasSegment.SetFilterForSegments(QualityFilterThreshold, mergedSegments, CanvasFilter.SegmentSizeCutoff);

            List <string> extraHeaders       = new List <string>();
            var           coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutputPath(outFile);

            CanvasSegment.WriteCoveragePlotData(mergedSegments, _model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (_cnOracle != null)
            {
                GenerateReportVersusKnownCopyNumber();
            }

            if (!string.IsNullOrEmpty(ploidy?.HeaderLine))
            {
                extraHeaders.Add(ploidy.HeaderLine);
            }

            CanvasSegmentWriter.WriteSegments(outFile, mergedSegments, _model.DiploidCoverage, referenceFolder, sampleName,
                                              extraHeaders, ploidy, QualityFilterThreshold, false, null, null);
            return(0);
        }
Esempio n. 3
0
        internal int CallVariants(List <string> variantFrequencyFiles, List <string> segmentFiles,
                                  IFileLocation outVcfFile, string ploidyBedPath, string referenceFolder, List <string> sampleNames, string commonCnvsBedPath, List <SampleType> sampleTypes)
        {
            // load files
            // initialize data structures and classes
            var fileCounter      = 0;
            var samplesInfo      = new SampleMap <SampleMetrics>();
            var sampleSegments   = new SampleMap <Segments>();
            var copyNumberModels = new SampleMap <ICopyNumberModel>();
            var variantFrequencyFilesSampleList = new SampleMap <string>();
            var kinships = new SampleMap <SampleType>();

            foreach (string sampleName in sampleNames)
            {
                var sampleId = new SampleId(sampleName);
                var segment  = Segments.ReadSegments(_logger, new FileLocation(segmentFiles[fileCounter]));
                segment.AddAlleles(CanvasIO.ReadFrequenciesWrapper(_logger, new FileLocation(variantFrequencyFiles[fileCounter]), segment.IntervalsByChromosome));
                sampleSegments.Add(sampleId, segment);
                var sampleInfo      = SampleMetrics.GetSampleInfo(segment.AllSegments, ploidyBedPath, _callerParameters.NumberOfTrimmedBins, sampleId);
                var copyNumberModel = _copyNumberModelFactory.CreateModel(_callerParameters.MaximumCopyNumber, sampleInfo.MaxCoverage, sampleInfo.MeanCoverage, sampleInfo.MeanMafCoverage);
                samplesInfo.Add(sampleId, sampleInfo);
                copyNumberModels.Add(sampleId, copyNumberModel);
                variantFrequencyFilesSampleList.Add(sampleId, variantFrequencyFiles[fileCounter]);
                kinships.Add(sampleId, sampleTypes[fileCounter]);
                fileCounter++;
            }
            var segmentSetsFromCommonCnvs = CreateSegmentSetsFromCommonCnvs(variantFrequencyFilesSampleList,
                                                                            _callerParameters.MinAlleleCountsThreshold, commonCnvsBedPath, sampleSegments);

            var          segmentsForVariantCalling = GetHighestLikelihoodSegments(segmentSetsFromCommonCnvs, samplesInfo, copyNumberModels).ToList();
            PedigreeInfo pedigreeInfo = PedigreeInfo.GetPedigreeInfo(kinships, _callerParameters);

            Parallel.ForEach(
                segmentsForVariantCalling,
                new ParallelOptions
            {
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, _callerParameters.MaxCoreNumber)
            },
                segments => _variantCaller.CallVariant(segments, samplesInfo, copyNumberModels, pedigreeInfo)
                );
            var variantCalledSegments = new SampleMap <List <CanvasSegment> >();

            foreach (var key in samplesInfo.SampleIds)
            {
                variantCalledSegments.Add(key, segmentsForVariantCalling.Select(segment => segment[key]).ToList());
            }

            var mergedVariantCalledSegments = MergeSegments(variantCalledSegments, _callerParameters.MinimumCallSize, _qualityFilterThreshold);

            FilterExcessivelyShortSegments(mergedVariantCalledSegments);

            var outputFolder = outVcfFile.Directory;

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var coverageOutputPath = SingleSampleCallset.GetCoverageAndVariantFrequencyOutput(outputFolder,
                                                                                                  sampleId.ToString());
                CanvasSegment.WriteCoveragePlotData(mergedVariantCalledSegments[sampleId], samplesInfo[sampleId].MeanCoverage,
                                                    samplesInfo[sampleId].Ploidy, coverageOutputPath, referenceFolder);
            }
            bool isPedigreeInfoSupplied = pedigreeInfo != null && pedigreeInfo.HasFullPedigree();
            var  denovoQualityThreshold = isPedigreeInfoSupplied ? (int?)_deNovoQualityFilterThreshold : null;
            var  ploidies        = samplesInfo.Select(info => info.Value.Ploidy).ToList();
            var  diploidCoverage = samplesInfo.Select(info => info.Value.MeanCoverage).ToList();
            var  names           = samplesInfo.SampleIds.Select(id => id.ToString()).ToList();

            CanvasSegmentWriter.WriteMultiSampleSegments(outVcfFile.FullName, mergedVariantCalledSegments, diploidCoverage, referenceFolder, names,
                                                         null, ploidies, _qualityFilterThreshold, denovoQualityThreshold, CanvasFilter.SegmentSizeCutoff, isPedigreeInfoSupplied);

            foreach (var sampleId in samplesInfo.SampleIds)
            {
                var outputVcfPath = SingleSampleCallset.GetVcfOutput(outputFolder, sampleId.ToString());
                var sampleMetrics = samplesInfo[sampleId];
                var segments      = mergedVariantCalledSegments[sampleId];
                CanvasSegmentWriter.WriteSegments(outputVcfPath.FullName, segments,
                                                  sampleMetrics.MeanCoverage, referenceFolder, sampleId.ToString(), null,
                                                  sampleMetrics.Ploidy, _qualityFilterThreshold, isPedigreeInfoSupplied, denovoQualityThreshold, null);

                var visualizationTemp   = outputFolder.CreateSubdirectory($"VisualizationTemp{sampleId}");
                var normalizationFactor = NormalizationCalculator.ComputeNormalizationFactor(segments);
                var bigWig = _coverageBigWigWriter.Write(segments, visualizationTemp, normalizationFactor);
                bigWig?.MoveTo(SingleSampleCallset.GetCoverageBigWig(outputFolder, sampleId.ToString()));
                var copyNumberBedGraph = SingleSampleCallset.GetCopyNumberBedGraph(outputFolder, sampleId.ToString());
                _copyNumberBedGraphWriter.Write(segments, sampleMetrics.Ploidy, copyNumberBedGraph);

                var partitionBedgraphHeader = "track type=bedGraph visibility=full autoScale=on graphType=points";
                var originalSegments        = sampleSegments[sampleId];
                _partitionCoverageBedGraphWriter.Write(originalSegments.AllSegments, SingleSampleCallset.GetPartitionBedGraph(outputFolder, sampleId.ToString()), normalizationFactor, partitionBedgraphHeader);
            }
            return(0);
        }