This class represents a set of known copy number calls ("truth set"), useful for training models.
Esempio n. 1
0
        public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName,
            string truthDataPath)
        {
            if (!string.IsNullOrEmpty(truthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(truthDataPath);
            }

            this.Segments = CanvasSegment.ReadSegments(inFile);
            this.TempFolder = Path.GetDirectoryName(inFile);
            if (this.Segments.Count == 0)
            {
                Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made.");
                CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null);
                return 0;
            }
            PloidyInfo ploidy = null;
            if (!string.IsNullOrEmpty(ploidyBedPath)) ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);

            // load MAF
            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments);

            // Create new models for different copy number states
            this.InitializePloidies();

            // Compute statistics on the copy number two regions
            float[] diploidCounts = AggregateCounts(ref this.Segments);
            DiploidCoverage = CanvasCommon.Utilities.Mean(diploidCounts);
            CoverageWeightingFactor = CoverageWeighting / DiploidCoverage;

            // new coverage model
            this.Model = new CoverageModel();
            Model.DiploidCoverage = DiploidCoverage;
            List<SegmentInfo> segments = new List<SegmentInfo>();
            foreach (CanvasSegment segment in this.Segments)
            {
                SegmentInfo info = new SegmentInfo();
                info.Segment = segment;
                List<double> MAF = new List<double>();
                foreach (float value in segment.VariantFrequencies) MAF.Add(value > 0.5 ? 1 - value : value);

                if (MAF.Count > 0)
                {
                    info.MAF = CanvasCommon.Utilities.Median(MAF);

                }
                else
                {
                    info.MAF = -1;
                }

                info.Coverage = CanvasCommon.Utilities.Median(segment.Counts);

                if (this.Segments.Count > 100)
                {
                    info.Weight = segment.End - segment.Begin;
                }
                else
                {
                    info.Weight = segment.BinCount;
                }
                segments.Add(info);
            }

            // Assign copy number and major chromosome count for each segment
            bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115):
            if (useGaussianMixtureModel)
            {
                // optimize model covariance
                double likelihood = FitGaussians(Model, segments);
                AssignPloidyCallsGaussianMixture();
            }
            else
            {
                AssignPloidyCallsDistance(Model, segments, medianVariantCoverage);
            }

            // Merge neighboring segments that got the same copy number call.
            CanvasSegment.MergeSegments(ref this.Segments);
            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline);
            List<string> extraHeaders = new List<string>();
            string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile);
            CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder);

            if (this.CNOracle != null)
            {
                this.GenerateReportVersusKnownCN();
            }

            if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine)) extraHeaders.Add(ploidy.HeaderLine);
            CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy);
            return 0;
        }
Esempio n. 2
0
        public int CallVariants(string inFile, string variantFrequencyFile, string outputVCFPath, string referenceFolder, string name, double? localSDmertic)
        {
            this.OutputFolder = Path.GetDirectoryName(outputVCFPath);
            this.TempFolder = Path.GetDirectoryName(inFile);
            Console.WriteLine("{0} CallVariants start:", DateTime.Now);
            this.Segments = CanvasSegment.ReadSegments(inFile);

            // Special logic: Increase the allowed model deviation for targeted data.
            if (this.Segments.Count < 500)
                this.DeviationFactor = 2.0f;

            // Some debugging output, for developer usage:
            if (!string.IsNullOrEmpty(this.TruthDataPath))
            {
                this.CNOracle = new CopyNumberOracle();
                this.CNOracle.LoadKnownCN(this.TruthDataPath);
            }
            if (this.CNOracle != null)
            {
                this.DebugModelCoverageByCN();
                this.DebugModelSegmentCoverageByCN();
            }

            this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments);
            if (this.IsDbsnpVcf)
                CanvasCommon.Utilities.PruneVariantFrequencies(this.Segments, this.TempFolder, ref MinimumVariantFrequenciesForInformativeSegment);
            this.InitializePloidies();

            if (this.CNOracle != null) this.DebugModelSegmentsByPloidy();
            List<string> ExtraHeaders = new List<string>();
            try
            {
                ExtraHeaders = CallCNVUsingSNVFrequency(localSDmertic, referenceFolder);
                string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outputVCFPath);
                CanvasSegment.WriteCoveragePlotData(this.Segments, this.Model.DiploidCoverage, this.ReferencePloidy, coverageOutputPath, referenceFolder);
            }
            catch (UncallableDataException e)
            {
                Console.WriteLine("Not calling any CNVs. Reason: {0}", e.Message);
                Segments.Clear();
            }
            if (this.ReferencePloidy != null && !string.IsNullOrEmpty(this.ReferencePloidy.HeaderLine))
            {
                ExtraHeaders.Add(this.ReferencePloidy.HeaderLine);
            }

            CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.Logistic);

            // Merge *neighboring* segments that got the same copy number call.
            // Enrichment is not allowed to merge non-adjacent segments, since many of those merges would
            // jump across non-manifest intervals.
            if (this.IsEnrichment)
            {
                CanvasSegment.MergeSegments(ref this.Segments, MinimumCallSize, 1);
            }
            else
            {
                CanvasSegment.MergeSegmentsUsingExcludedIntervals(ref this.Segments, MinimumCallSize, ExcludedIntervals);
            }

            if (this.CNOracle != null)
            {
                this.DebugEvaluateCopyNumberCallAccuracy();
                this.GenerateReportVersusKnownCN();
                this.GenerateExtendedReportVersusKnownCN();
            }
            // Write out results:
            CanvasSegment.WriteSegments(outputVCFPath, this.Segments, referenceFolder, name, ExtraHeaders, true, this.ReferencePloidy, true);

            return 0;
        }