Пример #1
0
        /// <summary>
        /// Perform de-novo CNV calling in two steps:
        /// 1. Filter REF variants and common CNVs, this step relies only on total CN calls with associated shortcomings
        /// 2. Assign de-novo quality based on joint likelihood across pedigree using marginalisation operations
        /// </summary>
        /// <param name="canvasSegments"></param>
        /// <param name="samplesInfo"></param>
        /// <param name="parentIDs"></param>
        /// <param name="offspringIDs"></param>
        /// <param name="copyNumbersLikelihoods"></param>
        private void SetDenovoQualityScores(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo, List <SampleId> parentIDs, List <SampleId> offspringIDs,
                                            JointLikelihoods copyNumbersLikelihoods)
        {
            foreach (var probandId in offspringIDs)
            {
                // targeted proband is REF
                if (IsReferenceVariant(canvasSegments, samplesInfo, probandId))
                {
                    continue;
                }
                // common variant
                if (CanvasPedigreeCaller.IsSharedCnv(canvasSegments, samplesInfo, parentIDs, probandId, _callerParameters.MaximumCopyNumber))
                {
                    continue;
                }
                // other offsprings are ALT
                if (!offspringIDs.Except(probandId.ToEnumerable()).All(id => IsReferenceVariant(canvasSegments, samplesInfo, id)))
                {
                    continue;
                }
                // not all q-scores are above the threshold
                if (parentIDs.Concat(probandId).Any(id => !IsPassVariant(canvasSegments, id)))
                {
                    continue;
                }
                double deNovoQualityScore = CanvasPedigreeCaller.GetConditionalDeNovoQualityScore(canvasSegments, copyNumbersLikelihoods, samplesInfo, parentIDs, probandId);

                // adjustment so that denovo quality score threshold is 20 (rather than 10) to match Manta
                deNovoQualityScore *= 2;

                if (Double.IsInfinity(deNovoQualityScore) | deNovoQualityScore > _callerParameters.MaxQscore)
                {
                    deNovoQualityScore = _callerParameters.MaxQscore;
                }
                canvasSegments[probandId].DqScore = deNovoQualityScore;
            }
        }
Пример #2
0
        /// <summary>
        /// Identify variant with the highest likelihood at a given setPosition and assign relevant scores
        /// </summary>
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var singleSampleLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel, _callerParameters.NumberOfTrimmedBins);

            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = pedigreeCopyNumbers.Concat(nonPedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            EstimateQScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods, pedigreeLikelihoods, mergedCopyNumbers);
            // TODO: this will be integrated with GetCopyNumbers* on a model level as a part of https://jira.illumina.com/browse/CANV-404
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasFullPedigree())
            {
                AssignMccWithPedigreeInfo(canvasSegments, copyNumberModel, pedigreeInfo);
            }
            if (CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments, _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment) &&
                pedigreeInfo.HasOther())
            {
                AssignMccNoPedigreeInfo(canvasSegments.Where(segment => pedigreeInfo.OtherIds.Contains(segment.SampleId)).ToSampleMap(), copyNumberModel, _genotypes);
            }
        }
Пример #3
0
        public void CallVariant(ISampleMap <CanvasSegment> canvasSegments, ISampleMap <SampleMetrics> samplesInfo,
                                ISampleMap <ICopyNumberModel> copyNumberModel, PedigreeInfo pedigreeInfo)
        {
            var coverageLikelihoods = _copyNumberLikelihoodCalculator.GetCopyNumbersLikelihoods(canvasSegments, samplesInfo, copyNumberModel);
            // if number and properties of SNPs in the segment are above threshold, calculate likelihood from SNPs and merge with
            // coverage likelihood to form merged likelihoods
            int nBalleles = canvasSegments.Values.First().Balleles.Size();
            // If allele information is available (i.e. segment has enough SNPs) merge coverage and allele likelihood obtained by GetGenotypeLogLikelihoods
            // into singleSampleLikelihoods using JoinLikelihoods function.
            var singleSampleLikelihoods = CanvasPedigreeCaller.UseAlleleCountsInformation(canvasSegments,
                                                                                          _callerParameters.MinAlleleCountsThreshold, _callerParameters.MinAlleleNumberInSegment)
                ? JoinLikelihoods(GetGenotypeLogLikelihoods(canvasSegments, copyNumberModel, _PhasedGenotypes), coverageLikelihoods, nBalleles)
                : ConvertToLogLikelihood(coverageLikelihoods);

            // estimate joint likelihood across pedigree samples from singleSampleLikelihoods using either only coverage or coverage + allele counts
            (var pedigreeCopyNumbers, var pedigreeLikelihoods) = GetPedigreeCopyNumbers(pedigreeInfo, singleSampleLikelihoods);

            var nonPedigreeCopyNumbers = CanvasPedigreeCaller.GetNonPedigreeCopyNumbers(canvasSegments, pedigreeInfo, singleSampleLikelihoods);

            var mergedCopyNumbers = nonPedigreeCopyNumbers.Concat(pedigreeCopyNumbers).OrderBy(canvasSegments.SampleIds);

            AssignCNandScores(canvasSegments, samplesInfo, pedigreeInfo, singleSampleLikelihoods,
                              pedigreeLikelihoods, mergedCopyNumbers);
        }
Пример #4
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            string outDir                = null;
            var    segmentFiles          = new List <string>();
            var    variantFrequencyFiles = new List <string>();
            string ploidyBedPath         = null;
            string pedigreeFile          = null;
            string referenceFolder       = null;
            var    sampleNames           = new List <string>();
            bool   needHelp              = false;
            int?   qScoreThreshold       = null;
            int?   dqScoreThreshold      = null;
            string parameterconfigPath   = Path.Combine(Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json");

            var caller = new CanvasPedigreeCaller();

            var p = new OptionSet()
            {
                { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) },
                { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) },
                { "o|outdir=", "name of output directory", v => outDir = v },
                { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v },
                { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) },
                { "f|pedigree=", "relationship withoin pedigree (parents/proband)", v => pedigreeFile = v },
                { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "q|qscore=", $"quality filter threshold (default {caller.QualityFilterThreshold})", v => qScoreThreshold = int.Parse(v) },
                { "d|dqscore=", $"de novo quality filter threshold (default {caller.DeNovoQualityFilterThreshold})", v => dqScoreThreshold = int.Parse(v) },
                { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }
            };

            var extraArgs = p.Parse(args);

            if (extraArgs.Count > 0)
            {
                Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]);
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir))
            {
                ShowHelp(p);
                return(0);
            }

            foreach (string segmentFile in segmentFiles)
            {
                if (File.Exists(segmentFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting.");
                return(1);
            }

            foreach (string variantFrequencyFile in variantFrequencyFiles)
            {
                if (File.Exists(variantFrequencyFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting.");
                return(1);
            }

            if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml")))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting.");
                return(1);
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting.");
                return(1);
            }

            var parameterconfigFile = new FileLocation(parameterconfigPath);

            caller.CallerParameters = Deserialize <PedigreeCallerParameters>(parameterconfigFile);

            if (pedigreeFile.IsNullOrEmpty())
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: pedigreeFile option is not used! Calling CNV variants without family information.");
                return(caller.CallVariants(variantFrequencyFiles, segmentFiles, outDir, ploidyBedPath, referenceFolder, sampleNames));
            }

            if (qScoreThreshold.HasValue & qScoreThreshold > 0 & qScoreThreshold < 60)
            {
                caller.QualityFilterThreshold = qScoreThreshold.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThreshold}.");
            }


            if (dqScoreThreshold.HasValue & dqScoreThreshold > 0 & dqScoreThreshold < 60)
            {
                caller.DeNovoQualityFilterThreshold = dqScoreThreshold.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThreshold}.");
            }

            if (!File.Exists(pedigreeFile))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {pedigreeFile} does not exist! Exiting.");
                return(1);
            }

            return(caller.CallVariantsInPedigree(variantFrequencyFiles, segmentFiles, outDir, ploidyBedPath, referenceFolder, sampleNames, pedigreeFile));
        }
Пример #5
0
        private static int Run(string[] args)
        {
            Utilities.LogCommandLine(args);
            string outDir                 = null;
            var    segmentFiles           = new List <string>();
            var    variantFrequencyFiles  = new List <string>();
            var    sampleTypesString      = new List <string>();
            string ploidyBedPath          = null;
            string referenceFolder        = null;
            var    sampleNames            = new List <string>();
            bool   needHelp               = false;
            int?   qScoreThresholdOption  = null;
            int?   dqScoreThresholdOption = null;
            string commonCnvsBedPath      = null;
            string parameterconfigPath    = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json");

            var p = new OptionSet()
            {
                { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) },
                { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) },
                { "t|sampleType=", "sample types", v => sampleTypesString.Add(v) },
                { "o|outdir=", "name of output directory", v => outDir = v },
                { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v },
                { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) },
                { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "q|qscore=", $"quality filter threshold (default {CanvasPedigreeCaller.DefaultQualityFilterThreshold})", v => qScoreThresholdOption = int.Parse(v) },
                { "commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCnvsBedPath = v },
                { "d|dqscore=", $"de novo quality filter threshold (default {CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold})", v => dqScoreThresholdOption = int.Parse(v) },
                { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }
            };

            var extraArgs = p.Parse(args);

            if (extraArgs.Count > 0)
            {
                Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]);
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir))
            {
                ShowHelp(p);
                return(0);
            }

            foreach (string segmentFile in segmentFiles)
            {
                if (File.Exists(segmentFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting.");
                return(1);
            }

            foreach (string variantFrequencyFile in variantFrequencyFiles)
            {
                if (File.Exists(variantFrequencyFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting.");
                return(1);
            }

            var sampleTypesEnum = sampleTypesString.Select(GetSampleType).ToList();

            if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml")))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting.");
                return(1);
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting.");
                return(1);
            }

            if (commonCnvsBedPath != null)
            {
                if (!File.Exists(commonCnvsBedPath))
                {
                    Console.WriteLine($"CanvasPedigreeCaller.exe: File {commonCnvsBedPath} does not exist! Exiting.");
                    return(1);
                }
            }

            var parameterconfigFile = new FileLocation(parameterconfigPath);
            var callerParameters    = Deserialize <PedigreeCallerParameters>(parameterconfigFile);

            int qScoreThreshold = CanvasPedigreeCaller.DefaultQualityFilterThreshold;

            if (qScoreThresholdOption != null)
            {
                qScoreThreshold = qScoreThresholdOption.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThresholdOption}.");
            }
            if (qScoreThreshold < 0 || qScoreThreshold >= callerParameters.MaxQscore)
            {
                throw new IlluminaException($"Quality score threshold must be >= 0 and < {callerParameters.MaxQscore}");
            }

            int dqScoreThreshold = CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold;

            if (dqScoreThresholdOption != null)
            {
                dqScoreThreshold = dqScoreThresholdOption.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThresholdOption}.");
            }
            if (dqScoreThreshold < 0 || dqScoreThreshold >= callerParameters.MaxQscore)
            {
                throw new IlluminaException($"De novo quality score threshold must be >= 0 and < {callerParameters.MaxQscore}");
            }

            var logger                      = new Logger(new[] { Console.Out }, new[] { Console.Error });
            var settings                    = IsasConfigurationSettings.GetConfigSettings();
            var outputDirectory             = new DirectoryLocation(outDir);
            var workerDirectory             = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPedigreeCaller)));
            var commandManager              = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory));
            var result                      = -1;
            var pedigreeCallerWorkDirectory = outputDirectory.GetDirectoryLocation("CanvasPedigreeCaller");

            WorkDoerFactory.RunWithWorkDoer(logger, settings, pedigreeCallerWorkDirectory, workDoer =>
            {
                var copyNumberLikelihoodCalculator = new CopyNumberLikelihoodCalculator(callerParameters.MaximumCopyNumber);
                var variantCaller = callerParameters.DefaultCaller == CallerType.VariantCaller ?
                                    (IVariantCaller) new VariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold) :
                                    new HaplotypeVariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold);

                var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
                var referenceGenome        = new ReferenceGenomeFactory().GetReferenceGenome(new DirectoryLocation(referenceFolder));
                var genomeMetadata         = referenceGenome.GenomeMetadata;

                var coverageBigWigWriterFactory =
                    new CoverageVisualizationWriterFactory(logger, workDoer, commandManager, genomeMetadata);
                var roundingBedGraphWriter        = new RoundingBedGraphWriter(new BedGraphWriterFacade(), 4);
                var coverageBigWigWriter          = coverageBigWigWriterFactory.CreateBinCoverageBigWigWriter(roundingBedGraphWriter);
                var segmentCoverageBedGraphWriter = coverageBigWigWriterFactory.CreateSegmentBedGraphWriter(roundingBedGraphWriter);

                var tabixWrapper             = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager);
                var bgzfBedGraphWriter       = new BgzfBedGraphWriter(new BedGraphWriterFacade(), tabixWrapper);
                var copyNumberBedGraphWriter = new CopyNumberBedGraphWriter(bgzfBedGraphWriter, new CopyNumberBedGraphCalculator());

                var caller = new CanvasPedigreeCaller(logger, qScoreThreshold, dqScoreThreshold, callerParameters, copyNumberLikelihoodCalculator, variantCaller, coverageBigWigWriter, copyNumberModelFactory, copyNumberBedGraphWriter, segmentCoverageBedGraphWriter);

                var outVcf = outputDirectory.GetFileLocation("CNV.vcf.gz");
                result     = caller.CallVariants(variantFrequencyFiles, segmentFiles, outVcf, ploidyBedPath, referenceFolder, sampleNames, commonCnvsBedPath, sampleTypesEnum);
            });
            return(result);
        }