public static void Evaluate(string truthSetPath, string cnvCallsPath, string excludedBed, string outputPath, EvaluateCnvOptions options) { double heterogeneityFraction = options.HeterogeneityFraction; var knownCn = LoadKnownCn(truthSetPath, heterogeneityFraction); knownCn = knownCn.SelectValues( truthEntries => truthEntries.Where(truthEntry => truthEntry.Length >= options.MinEntrySize).ToList()); var calls = GetCnvCallsFromVcf(cnvCallsPath, options.DQscoreThreshold); calls = calls.SelectValues( chromosomeCalls => chromosomeCalls.Where(call => call.Length >= options.MinEntrySize).ToList()); // LoadRegionsOfInterest(options.RoiBed?.FullName); var excludeIntervals = new Dictionary <string, List <CNInterval> >(); if (!string.IsNullOrEmpty(excludedBed)) { var excludeIntervalsTmp = LoadIntervalsFromBed(excludedBed, false, 1.0); List <string> keys = excludeIntervalsTmp.Keys.ToList(); foreach (string key in keys) { string chr = key; if (!calls.ContainsKey(chr)) { chr = key.Replace("chr", ""); } if (!calls.ContainsKey(chr)) { chr = "chr" + key; } if (!calls.ContainsKey(chr)) { Console.WriteLine($"Error: Skipping exclude intervals for chromosome {key} with no truth data." + $"Check that chromosome names are spelled correctly for exclude intervals"); continue; } excludeIntervals[chr] = excludeIntervalsTmp[key]; } } Console.WriteLine("TruthSet\t{0}", truthSetPath); Console.WriteLine("CNVCalls\t{0}", cnvCallsPath); bool includePassingOnly = Path.GetFileName(cnvCallsPath).ToLower().Contains("vcf"); var logger = new Logger(new[] { Console.Out }, new[] { Console.Error }); var settings = IsasConfigurationSettings.GetConfigSettings(); var output = new DirectoryLocation(outputPath); var workerDirectory = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CNVChecker))); var commandManager = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory)); WorkDoerFactory.RunWithWorkDoer(logger, settings, output, workDoer => { var tabixWrapper = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager); var ploidyCorrector = new PloidyCorrector(logger, workDoer, new PloidyEstimator(logger, workDoer, null, false, commandManager), tabixWrapper, false); var checker = new CNVChecker(options.DQscoreThreshold, excludeIntervals, ploidyCorrector); if (options.PloidyInfo.SexPloidyInfo != null) { Console.WriteLine($">>>Getting reference ploidy from provided ploidy information and PAR bed file '{options.PloidyInfo.ParBed}'"); var ploidy = checker.GetPloidy(options.PloidyInfo, output); var referencePloidy = LoadReferencePloidy(options.PloidyInfo.SexPloidyInfo, options.PloidyInfo.ParBed); knownCn = GetKnownCopyNumberWithReferencePloidy(referencePloidy, knownCn); calls = GetCallsWithRefPloidy(calls, ploidy); } var cnvEvaluator = new CnvEvaluator(checker); if (checker.DQscoreThreshold.HasValue && !Path.GetFileName(cnvCallsPath).ToLower().Contains("vcf")) { throw new ArgumentException("CNV.vcf must be in a vcf format when --dqscore option is used"); } cnvEvaluator.ComputeAccuracy(knownCn, cnvCallsPath, outputPath, includePassingOnly, options, calls); if (includePassingOnly) { cnvEvaluator.ComputeAccuracy(knownCn, cnvCallsPath, outputPath, false, options, calls); } ComputeCallability(logger, calls, options, output); Console.WriteLine(">>>Done - results written to {0}", outputPath); }); }
private static int Run(string[] args) { Utilities.LogCommandLine(args); string outDir = null; var segmentFiles = new List <string>(); var variantFrequencyFiles = new List <string>(); var sampleTypesString = new List <string>(); string ploidyBedPath = null; string referenceFolder = null; var sampleNames = new List <string>(); bool needHelp = false; int? qScoreThresholdOption = null; int? dqScoreThresholdOption = null; string commonCnvsBedPath = null; string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json"); var p = new OptionSet() { { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) }, { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) }, { "t|sampleType=", "sample types", v => sampleTypesString.Add(v) }, { "o|outdir=", "name of output directory", v => outDir = v }, { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v }, { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) }, { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "q|qscore=", $"quality filter threshold (default {CanvasPedigreeCaller.DefaultQualityFilterThreshold})", v => qScoreThresholdOption = int.Parse(v) }, { "commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCnvsBedPath = v }, { "d|dqscore=", $"de novo quality filter threshold (default {CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold})", v => dqScoreThresholdOption = int.Parse(v) }, { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v } }; var extraArgs = p.Parse(args); if (extraArgs.Count > 0) { Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]); needHelp = true; } if (needHelp) { ShowHelp(p); return(0); } if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir)) { ShowHelp(p); return(0); } foreach (string segmentFile in segmentFiles) { if (File.Exists(segmentFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting."); return(1); } foreach (string variantFrequencyFile in variantFrequencyFiles) { if (File.Exists(variantFrequencyFile)) { continue; } Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting."); return(1); } var sampleTypesEnum = sampleTypesString.Select(GetSampleType).ToList(); if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml"))) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting."); return(1); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting."); return(1); } if (commonCnvsBedPath != null) { if (!File.Exists(commonCnvsBedPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {commonCnvsBedPath} does not exist! Exiting."); return(1); } } var parameterconfigFile = new FileLocation(parameterconfigPath); var callerParameters = Deserialize <PedigreeCallerParameters>(parameterconfigFile); int qScoreThreshold = CanvasPedigreeCaller.DefaultQualityFilterThreshold; if (qScoreThresholdOption != null) { qScoreThreshold = qScoreThresholdOption.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThresholdOption}."); } if (qScoreThreshold < 0 || qScoreThreshold >= callerParameters.MaxQscore) { throw new IlluminaException($"Quality score threshold must be >= 0 and < {callerParameters.MaxQscore}"); } int dqScoreThreshold = CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold; if (dqScoreThresholdOption != null) { dqScoreThreshold = dqScoreThresholdOption.Value; Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThresholdOption}."); } if (dqScoreThreshold < 0 || dqScoreThreshold >= callerParameters.MaxQscore) { throw new IlluminaException($"De novo quality score threshold must be >= 0 and < {callerParameters.MaxQscore}"); } var logger = new Logger(new[] { Console.Out }, new[] { Console.Error }); var settings = IsasConfigurationSettings.GetConfigSettings(); var outputDirectory = new DirectoryLocation(outDir); var workerDirectory = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPedigreeCaller))); var commandManager = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory)); var result = -1; var pedigreeCallerWorkDirectory = outputDirectory.GetDirectoryLocation("CanvasPedigreeCaller"); WorkDoerFactory.RunWithWorkDoer(logger, settings, pedigreeCallerWorkDirectory, workDoer => { var copyNumberLikelihoodCalculator = new CopyNumberLikelihoodCalculator(callerParameters.MaximumCopyNumber); var variantCaller = callerParameters.DefaultCaller == CallerType.VariantCaller ? (IVariantCaller) new VariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold) : new HaplotypeVariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold); var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var referenceGenome = new ReferenceGenomeFactory().GetReferenceGenome(new DirectoryLocation(referenceFolder)); var genomeMetadata = referenceGenome.GenomeMetadata; var coverageBigWigWriterFactory = new CoverageVisualizationWriterFactory(logger, workDoer, commandManager, genomeMetadata); var roundingBedGraphWriter = new RoundingBedGraphWriter(new BedGraphWriterFacade(), 4); var coverageBigWigWriter = coverageBigWigWriterFactory.CreateBinCoverageBigWigWriter(roundingBedGraphWriter); var segmentCoverageBedGraphWriter = coverageBigWigWriterFactory.CreateSegmentBedGraphWriter(roundingBedGraphWriter); var tabixWrapper = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager); var bgzfBedGraphWriter = new BgzfBedGraphWriter(new BedGraphWriterFacade(), tabixWrapper); var copyNumberBedGraphWriter = new CopyNumberBedGraphWriter(bgzfBedGraphWriter, new CopyNumberBedGraphCalculator()); var caller = new CanvasPedigreeCaller(logger, qScoreThreshold, dqScoreThreshold, callerParameters, copyNumberLikelihoodCalculator, variantCaller, coverageBigWigWriter, copyNumberModelFactory, copyNumberBedGraphWriter, segmentCoverageBedGraphWriter); var outVcf = outputDirectory.GetFileLocation("CNV.vcf.gz"); result = caller.CallVariants(variantFrequencyFiles, segmentFiles, outVcf, ploidyBedPath, referenceFolder, sampleNames, commonCnvsBedPath, sampleTypesEnum); }); return(result); }