/// <summary> /// Perform de-novo CNV calling in two steps: /// 1. Filter REF variants and common CNVs, this step relies only on total CN calls with associated shortcomings /// 2. Assign de-novo quality based on joint likelihood across pedigree using marginalisation operations /// </summary> /// <param name="canvasSegments"></param> /// <param name="samplesInfo"></param> /// <param name="parentIDs"></param> /// <param name="offspringIDs"></param> /// <param name="copyNumbersLikelihoods"></param> private void SetDenovoQualityScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, List <SampleId> offspringIDs, JointLikelihoods copyNumbersLikelihoods) { foreach (var probandId in offspringIDs) { // targeted proband is REF if (IsReferenceVariant(canvasSegments, samplesInfo, probandId)) { continue; } // common variant if (CanvasPedigreeCaller.IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, _callerParameters.MaximumCopyNumber)) { continue; } // other offsprings are ALT if (!offspringIDs.Except(probandId.ToEnumerable()).All(id => IsReferenceVariant(canvasSegments, samplesInfo, id))) { continue; } // not all q-scores are above the threshold if (parentIDs.Concat(probandId).Any(id => !IsPassVariant(canvasSegments, id))) { continue; } double deNovoQualityScore = CanvasPedigreeCaller.GetConditionalDeNovoQualityScore(canvasSegments, copyNumbersLikelihoods, samplesInfo, parentIDs, probandId); // adjustment so that denovo quality score threshold is 20 (rather than 10) to match Manta deNovoQualityScore *= 2; if (Double.IsInfinity(deNovoQualityScore) | deNovoQualityScore > _callerParameters.MaxQscore) { deNovoQualityScore = _callerParameters.MaxQscore; } canvasSegments[probandId].DqScore = deNovoQualityScore; } }
/// <summary> /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores /// </summary> public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins); (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404 if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasFullPedigree()) { AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo); } if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) && pedigreeInfo.HasOther()) { AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes); } }
public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo) { var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel); // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with // coverage likelihood to form merged likelihoods int nBalleles = canvasSegments.Values.First().Balleles.Size(); // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods // into singleSampleLikelihoods using JoinLikelihoods function. var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles) : ConvertToLogLikelihood(coverageLikelihoods); // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods); var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods); var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds); AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers); }
static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); string outDir = null; var segmentFiles = new List <string>(); var variantFrequencyFiles = new List <string>(); string ploidyBedPath = null; string pedigreeFile = null; string referenceFolder = null; var sampleNames = new List <string>(); bool needHelp = false; int? qScoreThreshold = null; int? dqScoreThreshold = null; string parameterconfigPath = Path.Combine(Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json"); var caller = new CanvasPedigreeCaller(); var p = new OptionSet() { { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) }, { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) }, { "o|outdir=", "name of output directory", v => outDir = v }, { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v }, { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) }, { "f|pedigree=", "relationship withoin pedigree (parents/proband)", v => pedigreeFile = v }, { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "q|qscore=", $"quality filter threshold (default {caller.QualityFilterThreshold})", v => qScoreThreshold = int.Parse(v) }, { "d|dqscore=", $"de novo quality filter threshold (default {caller.DeNovoQualityFilterThreshold})", v => dqScoreThreshold = int.Parse(v) }, { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v } }; var extraArgs = p.Parse(args); if (extraArgs.Count > 0) { Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]); needHelp = true; } if (needHelp) { ShowHelp(p); return(0); } if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir)) { ShowHelp(p); return(0); } foreach (string segmentFile in segmentFiles) { if (File.Exists(segmentFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting."); return(1); } foreach (string variantFrequencyFile in variantFrequencyFiles) { if (File.Exists(variantFrequencyFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting."); return(1); } if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml"))) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting."); return(1); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting."); return(1); } var parameterconfigFile = new FileLocation(parameterconfigPath); caller.CallerParameters = Deserialize <PedigreeCallerParameters>(parameterconfigFile); if (pedigreeFile.IsNullOrEmpty()) { Console.WriteLine($"CanvasPedigreeCaller.exe: pedigreeFile option is not used! Calling CNV variants without family information."); return(caller.CallVariants(variantFrequencyFiles, segmentFiles, outDir, ploidyBedPath, referenceFolder, sampleNames)); } if (qScoreThreshold.HasValue & qScoreThreshold > 0 & qScoreThreshold < 60) { caller.QualityFilterThreshold = qScoreThreshold.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThreshold}."); } if (dqScoreThreshold.HasValue & dqScoreThreshold > 0 & dqScoreThreshold < 60) { caller.DeNovoQualityFilterThreshold = dqScoreThreshold.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThreshold}."); } if (!File.Exists(pedigreeFile)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {pedigreeFile} does not exist! Exiting."); return(1); } return(caller.CallVariantsInPedigree(variantFrequencyFiles, segmentFiles, outDir, ploidyBedPath, referenceFolder, sampleNames, pedigreeFile)); }
private static int Run(string[] args) { Utilities.LogCommandLine(args); string outDir = null; var segmentFiles = new List <string>(); var variantFrequencyFiles = new List <string>(); var sampleTypesString = new List <string>(); string ploidyBedPath = null; string referenceFolder = null; var sampleNames = new List <string>(); bool needHelp = false; int? qScoreThresholdOption = null; int? dqScoreThresholdOption = null; string commonCnvsBedPath = null; string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json"); var p = new OptionSet() { { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) }, { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) }, { "t|sampleType=", "sample types", v => sampleTypesString.Add(v) }, { "o|outdir=", "name of output directory", v => outDir = v }, { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v }, { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) }, { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "q|qscore=", $"quality filter threshold (default {CanvasPedigreeCaller.DefaultQualityFilterThreshold})", v => qScoreThresholdOption = int.Parse(v) }, { "commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCnvsBedPath = v }, { "d|dqscore=", $"de novo quality filter threshold (default {CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold})", v => dqScoreThresholdOption = int.Parse(v) }, { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v } }; var extraArgs = p.Parse(args); if (extraArgs.Count > 0) { Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]); needHelp = true; } if (needHelp) { ShowHelp(p); return(0); } if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir)) { ShowHelp(p); return(0); } foreach (string segmentFile in segmentFiles) { if (File.Exists(segmentFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting."); return(1); } foreach (string variantFrequencyFile in variantFrequencyFiles) { if (File.Exists(variantFrequencyFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting."); return(1); } var sampleTypesEnum = sampleTypesString.Select(GetSampleType).ToList(); if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml"))) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting."); return(1); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting."); return(1); } if (commonCnvsBedPath != null) { if (!File.Exists(commonCnvsBedPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {commonCnvsBedPath} does not exist! Exiting."); return(1); } } var parameterconfigFile = new FileLocation(parameterconfigPath); var callerParameters = Deserialize <PedigreeCallerParameters>(parameterconfigFile); int qScoreThreshold = CanvasPedigreeCaller.DefaultQualityFilterThreshold; if (qScoreThresholdOption != null) { qScoreThreshold = qScoreThresholdOption.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThresholdOption}."); } if (qScoreThreshold < 0 || qScoreThreshold >= callerParameters.MaxQscore) { throw new IlluminaException($"Quality score threshold must be >= 0 and < {callerParameters.MaxQscore}"); } int dqScoreThreshold = CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold; if (dqScoreThresholdOption != null) { dqScoreThreshold = dqScoreThresholdOption.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThresholdOption}."); } if (dqScoreThreshold < 0 || dqScoreThreshold >= callerParameters.MaxQscore) { throw new IlluminaException($"De novo quality score threshold must be >= 0 and < {callerParameters.MaxQscore}"); } var logger = new Logger(new[] { Console.Out }, new[] { Console.Error }); var settings = IsasConfigurationSettings.GetConfigSettings(); var outputDirectory = new DirectoryLocation(outDir); var workerDirectory = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPedigreeCaller))); var commandManager = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory)); var result = -1; var pedigreeCallerWorkDirectory = outputDirectory.GetDirectoryLocation("CanvasPedigreeCaller"); WorkDoerFactory.RunWithWorkDoer(logger, settings, pedigreeCallerWorkDirectory, workDoer => { var copyNumberLikelihoodCalculator = new CopyNumberLikelihoodCalculator(callerParameters.MaximumCopyNumber); var variantCaller = callerParameters.DefaultCaller == CallerType.VariantCaller ? (IVariantCaller) new VariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold) : new HaplotypeVariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold); var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var referenceGenome = new ReferenceGenomeFactory().GetReferenceGenome(new DirectoryLocation(referenceFolder)); var genomeMetadata = referenceGenome.GenomeMetadata; var coverageBigWigWriterFactory = new CoverageVisualizationWriterFactory(logger, workDoer, commandManager, genomeMetadata); var roundingBedGraphWriter = new RoundingBedGraphWriter(new BedGraphWriterFacade(), 4); var coverageBigWigWriter = coverageBigWigWriterFactory.CreateBinCoverageBigWigWriter(roundingBedGraphWriter); var segmentCoverageBedGraphWriter = coverageBigWigWriterFactory.CreateSegmentBedGraphWriter(roundingBedGraphWriter); var tabixWrapper = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager); var bgzfBedGraphWriter = new BgzfBedGraphWriter(new BedGraphWriterFacade(), tabixWrapper); var copyNumberBedGraphWriter = new CopyNumberBedGraphWriter(bgzfBedGraphWriter, new CopyNumberBedGraphCalculator()); var caller = new CanvasPedigreeCaller(logger, qScoreThreshold, dqScoreThreshold, callerParameters, copyNumberLikelihoodCalculator, variantCaller, coverageBigWigWriter, copyNumberModelFactory, copyNumberBedGraphWriter, segmentCoverageBedGraphWriter); var outVcf = outputDirectory.GetFileLocation("CNV.vcf.gz"); result = caller.CallVariants(variantFrequencyFiles, segmentFiles, outVcf, ploidyBedPath, referenceFolder, sampleNames, commonCnvsBedPath, sampleTypesEnum); }); return(result); }