public int CallVariants(string variantFrequencyFile, string inFile, string outFile, string ploidyBedPath, string referenceFolder, string sampleName, string truthDataPath) { if (!string.IsNullOrEmpty(truthDataPath)) { this.CNOracle = new CopyNumberOracle(); this.CNOracle.LoadKnownCN(truthDataPath); } this.Segments = CanvasSegment.ReadSegments(inFile); this.TempFolder = Path.GetDirectoryName(inFile); if (this.Segments.Count == 0) { Console.WriteLine("CanvasDiploidCaller: No segments loaded; no CNV calls will be made."); CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, null, null); return 0; } PloidyInfo ploidy = null; if (!string.IsNullOrEmpty(ploidyBedPath)) ploidy = PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath); // load MAF this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments); int medianVariantCoverage = AggregateVariantCoverage(ref this.Segments); // Create new models for different copy number states this.InitializePloidies(); // Compute statistics on the copy number two regions float[] diploidCounts = AggregateCounts(ref this.Segments); DiploidCoverage = CanvasCommon.Utilities.Mean(diploidCounts); CoverageWeightingFactor = CoverageWeighting / DiploidCoverage; // new coverage model this.Model = new CoverageModel(); Model.DiploidCoverage = DiploidCoverage; List<SegmentInfo> segments = new List<SegmentInfo>(); foreach (CanvasSegment segment in this.Segments) { SegmentInfo info = new SegmentInfo(); info.Segment = segment; List<double> MAF = new List<double>(); foreach (float value in segment.VariantFrequencies) MAF.Add(value > 0.5 ? 1 - value : value); if (MAF.Count > 0) { info.MAF = CanvasCommon.Utilities.Median(MAF); } else { info.MAF = -1; } info.Coverage = CanvasCommon.Utilities.Median(segment.Counts); if (this.Segments.Count > 100) { info.Weight = segment.End - segment.Begin; } else { info.Weight = segment.BinCount; } segments.Add(info); } // Assign copy number and major chromosome count for each segment bool useGaussianMixtureModel = false; // For now, this is set false, since we saw weird performance on chrY (CANV-115): if (useGaussianMixtureModel) { // optimize model covariance double likelihood = FitGaussians(Model, segments); AssignPloidyCallsGaussianMixture(); } else { AssignPloidyCallsDistance(Model, segments, medianVariantCoverage); } // Merge neighboring segments that got the same copy number call. CanvasSegment.MergeSegments(ref this.Segments); CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.LogisticGermline); List<string> extraHeaders = new List<string>(); string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outFile); CanvasSegment.WriteCoveragePlotData(this.Segments, Model.DiploidCoverage, ploidy, coverageOutputPath, referenceFolder); if (this.CNOracle != null) { this.GenerateReportVersusKnownCN(); } if (ploidy != null && !string.IsNullOrEmpty(ploidy.HeaderLine)) extraHeaders.Add(ploidy.HeaderLine); CanvasSegment.WriteSegments(outFile, this.Segments, referenceFolder, sampleName, extraHeaders, ploidy); return 0; }
public int CallVariants(string inFile, string variantFrequencyFile, string outputVCFPath, string referenceFolder, string name, double? localSDmertic) { this.OutputFolder = Path.GetDirectoryName(outputVCFPath); this.TempFolder = Path.GetDirectoryName(inFile); Console.WriteLine("{0} CallVariants start:", DateTime.Now); this.Segments = CanvasSegment.ReadSegments(inFile); // Special logic: Increase the allowed model deviation for targeted data. if (this.Segments.Count < 500) this.DeviationFactor = 2.0f; // Some debugging output, for developer usage: if (!string.IsNullOrEmpty(this.TruthDataPath)) { this.CNOracle = new CopyNumberOracle(); this.CNOracle.LoadKnownCN(this.TruthDataPath); } if (this.CNOracle != null) { this.DebugModelCoverageByCN(); this.DebugModelSegmentCoverageByCN(); } this.MeanCoverage = CanvasIO.LoadVariantFrequencies(variantFrequencyFile, this.Segments); if (this.IsDbsnpVcf) CanvasCommon.Utilities.PruneVariantFrequencies(this.Segments, this.TempFolder, ref MinimumVariantFrequenciesForInformativeSegment); this.InitializePloidies(); if (this.CNOracle != null) this.DebugModelSegmentsByPloidy(); List<string> ExtraHeaders = new List<string>(); try { ExtraHeaders = CallCNVUsingSNVFrequency(localSDmertic, referenceFolder); string coverageOutputPath = CanvasCommon.Utilities.GetCoverageAndVariantFrequencyOutputPath(outputVCFPath); CanvasSegment.WriteCoveragePlotData(this.Segments, this.Model.DiploidCoverage, this.ReferencePloidy, coverageOutputPath, referenceFolder); } catch (UncallableDataException e) { Console.WriteLine("Not calling any CNVs. Reason: {0}", e.Message); Segments.Clear(); } if (this.ReferencePloidy != null && !string.IsNullOrEmpty(this.ReferencePloidy.HeaderLine)) { ExtraHeaders.Add(this.ReferencePloidy.HeaderLine); } CanvasSegment.AssignQualityScores(this.Segments, CanvasSegment.QScoreMethod.Logistic); // Merge *neighboring* segments that got the same copy number call. // Enrichment is not allowed to merge non-adjacent segments, since many of those merges would // jump across non-manifest intervals. if (this.IsEnrichment) { CanvasSegment.MergeSegments(ref this.Segments, MinimumCallSize, 1); } else { CanvasSegment.MergeSegmentsUsingExcludedIntervals(ref this.Segments, MinimumCallSize, ExcludedIntervals); } if (this.CNOracle != null) { this.DebugEvaluateCopyNumberCallAccuracy(); this.GenerateReportVersusKnownCN(); this.GenerateExtendedReportVersusKnownCN(); } // Write out results: CanvasSegment.WriteSegments(outputVCFPath, this.Segments, referenceFolder, name, ExtraHeaders, true, this.ReferencePloidy, true); return 0; }