private IFileLocation WriteManifest(NexteraManifest manifest, IDirectoryLocation sandbox) { var path = sandbox.GetFileLocation(manifest.Name); NexteraManifestUtils.WriteNexteraManifests(manifest, path.FullName); return(path); }
private CanvasCleanOutput InvokeCanvasClean(CanvasCallset callset, IFileLocation binnedPath) { StringBuilder commandLine = new StringBuilder(); commandLine.Length = 0; string executablePath = Path.Combine(_canvasFolder, "CanvasClean.exe"); if (CrossPlatform.IsThisMono()) { commandLine.AppendFormat("{0} ", executablePath); executablePath = Utilities.GetMonoPath(); } commandLine.AppendFormat("-i \"{0}\" ", binnedPath); string cleanedPath = Path.Combine(callset.TempFolder, string.Format("{0}.cleaned", callset.Id)); commandLine.AppendFormat("-o \"{0}\" ", cleanedPath); commandLine.AppendFormat("-g"); string ffpePath = null; // TruSight Cancer has 1,737 targeted regions. The cut-off 2000 is somewhat arbitrary. // TruSight One has 62,309 targeted regions. // Nextera Rapid Capture v1.1 has 411,513 targeted regions. if (!callset.IsEnrichment || callset.Manifest.Regions.Count > 2000) { ffpePath = Path.Combine(callset.TempFolder, "FilterRegions.txt"); commandLine.AppendFormat(" -s -r -f \"{0}\"", ffpePath); } if (callset.IsEnrichment) // manifest { if (!File.Exists(callset.TempManifestPath)) { NexteraManifestUtils.WriteNexteraManifests(callset.Manifest, callset.TempManifestPath); } commandLine.AppendFormat(" -t \"{0}\"", callset.TempManifestPath); } UnitOfWork cleanJob = new UnitOfWork() { ExecutablePath = executablePath, LoggingFolder = _workManager.LoggingFolder.FullName, LoggingStub = Path.GetFileName(cleanedPath), CommandLine = commandLine.ToString() }; if (_customParameters.ContainsKey("CanvasClean")) { cleanJob.CommandLine = Utilities.MergeCommandLineOptions(cleanJob.CommandLine, _customParameters["CanvasClean"], true); } _workManager.DoWorkSingleThread(cleanJob); var canvasCleanOutput = new CanvasCleanOutput(new FileLocation(cleanedPath), new FileLocation(ffpePath)); return(canvasCleanOutput); }
/// <summary> /// Invoke CanvasNormalize. /// </summary> /// <param name="callset"></param> /// <returns>path to the bin ratio bed file</returns> protected string InvokeCanvasNormalize(CanvasCallset callset, string tumorBinnedPath, Dictionary <string, string> bamToBinned, string ploidyBedPath, string mode = "weightedaverage") { string ratioBinnedPath = Path.Combine(callset.TempFolder, string.Format("{0}.ratio.binned", callset.Id)); string canvasNormalizePath = Path.Combine(_canvasFolder, "CanvasNormalize.exe"); string executablePath = canvasNormalizePath; if (CrossPlatform.IsThisMono()) { executablePath = Utilities.GetMonoPath(); } StringBuilder commandLine = new StringBuilder(); if (CrossPlatform.IsThisMono()) { commandLine.AppendFormat("{0} ", canvasNormalizePath); } commandLine.AppendFormat("-t {0} ", tumorBinnedPath.WrapWithShellQuote()); // tumor bed if (callset.IsEnrichment && callset.Manifest.CanvasControlAvailable) { commandLine.AppendFormat("-n {0} ", callset.Manifest.CanvasControlBinnedPath.WrapWithShellQuote()); // normal bed } else { foreach (string normalBinnedPath in callset.NormalBamPaths.Select(path => bamToBinned[path.BamFile.FullName])) { commandLine.AppendFormat("-n {0} ", normalBinnedPath.WrapWithShellQuote()); // normal bed } } commandLine.AppendFormat("-w {0} ", callset.NormalBinnedPath.WrapWithShellQuote()); // weighted average normal bed commandLine.AppendFormat("-o {0} ", ratioBinnedPath.WrapWithShellQuote()); // ratio bed if (callset.IsEnrichment) // manifest { if (!File.Exists(callset.TempManifestPath)) { NexteraManifestUtils.WriteNexteraManifests(callset.Manifest, callset.TempManifestPath); } commandLine.AppendFormat("-f {0} ", callset.TempManifestPath.WrapWithShellQuote()); } commandLine.AppendFormat("-m {0} ", mode.WrapWithShellQuote()); if (!string.IsNullOrEmpty(ploidyBedPath)) { commandLine.AppendFormat("-p {0} ", ploidyBedPath.WrapWithShellQuote()); } UnitOfWork normalizeJob = new UnitOfWork() { ExecutablePath = executablePath, LoggingFolder = _workManager.LoggingFolder.FullName, LoggingStub = Path.GetFileName(ratioBinnedPath), CommandLine = commandLine.ToString() }; if (_customParameters.ContainsKey("CanvasNormalize")) { normalizeJob.CommandLine = Utilities.MergeCommandLineOptions(normalizeJob.CommandLine, _customParameters["CanvasNormalize"], true); } _workManager.DoWorkSingleThread(normalizeJob); return(ratioBinnedPath); }
private int GetBinSize(CanvasCallset callset, string bamPath, List <string> intermediateDataPaths, string canvasReferencePath, string canvasBedPath) { string canvasBinPath = Path.Combine(_canvasFolder, "CanvasBin.exe"); string executablePath = canvasBinPath; if (CrossPlatform.IsThisMono()) { executablePath = Utilities.GetMonoPath(); } StringBuilder commandLine = new StringBuilder(); if (CrossPlatform.IsThisMono()) { commandLine.AppendFormat("{0} ", canvasBinPath); } commandLine.AppendFormat("-b \"{0}\" ", bamPath); commandLine.AppendFormat("-p "); // Paired-end input mode (Isaac or BWA output) commandLine.AppendFormat("-r \"{0}\" ", canvasReferencePath); foreach (string path in intermediateDataPaths) { commandLine.AppendFormat("-i \"{0}\" ", path); } commandLine.AppendFormat("-y "); // bin size only if (callset.IsEnrichment) // manifest { if (!File.Exists(callset.TempManifestPath)) { NexteraManifestUtils.WriteNexteraManifests(callset.Manifest, callset.TempManifestPath); } commandLine.AppendFormat("-t \"{0}\" ", callset.TempManifestPath); } string outputStub = Path.Combine(Path.GetDirectoryName(callset.BinSizePath), Path.GetFileNameWithoutExtension(callset.BinSizePath)); commandLine.AppendFormat("-f \"{0}\" -d {1} -o \"{2}\"", canvasBedPath, _countsPerBin, outputStub); UnitOfWork binJob = new UnitOfWork() { ExecutablePath = executablePath, LoggingFolder = _workManager.LoggingFolder.FullName, LoggingStub = Path.GetFileNameWithoutExtension(callset.BinSizePath), CommandLine = commandLine.ToString() }; if (_customParameters.ContainsKey("CanvasBin")) { binJob.CommandLine = Utilities.MergeCommandLineOptions(binJob.CommandLine, _customParameters["CanvasBin"], true); } _workManager.DoWorkSingleThread(binJob); int binSize; using (StreamReader reader = new StreamReader(callset.BinSizePath)) { binSize = int.Parse(reader.ReadLine()); } return(binSize); }
/// <summary> /// Invoke CanvasBin. Return null if this fails and we need to abort CNV calling for this sample. /// </summary> protected IFileLocation InvokeCanvasBin(CanvasCallset callset, string canvasReferencePath, string canvasBedPath, string ploidyBedPath) { StringBuilder commandLine = new StringBuilder(); string canvasBinPath = Path.Combine(_canvasFolder, "CanvasBin.exe"); string executablePath = canvasBinPath; if (CrossPlatform.IsThisMono()) { executablePath = Utilities.GetMonoPath(); } //use bam as input if (callset.Bam == null) { Console.WriteLine("Input bam file not seen for sample {0}_{1} - no CNV calls", callset.SampleName, callset.Id); return(null); } List <string> bamPaths = new List <string>(); bamPaths.Add(callset.Bam.BamFile.FullName); if (!(callset.IsEnrichment && callset.Manifest.CanvasControlAvailable)) // do not add normal BAMs if Canvas Control is available { bamPaths.AddRange(callset.NormalBamPaths.Select(bam => bam.BamFile.FullName)); } // loop over the reference sequences in that genome GenomeMetadata genomeMetadata = callset.GenomeMetadata; List <UnitOfWork> binJobs = new List <UnitOfWork>(); Dictionary <string, List <string> > intermediateDataPathsByBamPath = new Dictionary <string, List <string> >(); foreach (string bamPath in bamPaths) { intermediateDataPathsByBamPath[bamPath] = new List <string>(); } for (int bamIndex = 0; bamIndex < bamPaths.Count; bamIndex++) { foreach (GenomeMetadata.SequenceMetadata sequenceMetadata in genomeMetadata.Sequences.OrderByDescending(sequence => sequence.Length)) { // Only invoke CanvasBin for autosomes + allosomes; // don't invoke it for mitochondrial chromosome or extra contigs or decoys if (sequenceMetadata.Type != GenomeMetadata.SequenceType.Allosome && !sequenceMetadata.IsAutosome()) { continue; } string bamPath = bamPaths[bamIndex]; commandLine.Clear(); if (CrossPlatform.IsThisMono()) { commandLine.AppendFormat("{0} ", canvasBinPath); } commandLine.AppendFormat("-b \"{0}\" ", bamPath); if (callset.Bam.IsPairedEnd) { commandLine.AppendFormat("-p "); } commandLine.AppendFormat("-r \"{0}\" ", canvasReferencePath); commandLine.AppendFormat("-c {0} ", sequenceMetadata.Name); commandLine.AppendFormat("-m {0} ", _coverageMode); if (callset.IsEnrichment) // manifest { if (!File.Exists(callset.TempManifestPath)) { NexteraManifestUtils.WriteNexteraManifests(callset.Manifest, callset.TempManifestPath); } commandLine.AppendFormat("-t \"{0}\" ", callset.TempManifestPath); } string intermediateDataPath = Path.Combine(callset.TempFolder, string.Format("{0}_{1}_{2}.dat", callset.Id, bamIndex, sequenceMetadata.Name)); intermediateDataPathsByBamPath[bamPath].Add(intermediateDataPath); commandLine.AppendFormat("-f \"{0}\" -d {1} -o \"{2}\" ", canvasBedPath, _countsPerBin, intermediateDataPath); UnitOfWork binJob = new UnitOfWork() { ExecutablePath = executablePath, LoggingFolder = _workManager.LoggingFolder.FullName, LoggingStub = Path.GetFileName(intermediateDataPath), CommandLine = commandLine.ToString() }; if (_customParameters.ContainsKey("CanvasBin")) { binJob.CommandLine = Utilities.MergeCommandLineOptions(binJob.CommandLine, _customParameters["CanvasBin"], true); } binJobs.Add(binJob); } } _workManager.DoWorkParallelThreads(binJobs); // get bin size (of the smallest BAM) if normal BAMs are given int binSize = -1; if (bamPaths.Count > 1) { string smallestBamPath = SmallestFile(bamPaths); binSize = GetBinSize(callset, smallestBamPath, intermediateDataPathsByBamPath[smallestBamPath], canvasReferencePath, canvasBedPath); } else if (callset.IsEnrichment && callset.Manifest.CanvasControlAvailable) { binSize = callset.Manifest.CanvasBinSize.Value; } Dictionary <string, string> bamToBinned = new Dictionary <string, string>(); List <UnitOfWork> finalBinJobs = new List <UnitOfWork>(); for (int bamIdx = 0; bamIdx < bamPaths.Count; bamIdx++) { string bamPath = bamPaths[bamIdx]; // finish up CanvasBin step by merging intermediate data and finally binning string binnedPath = Path.Combine(callset.TempFolder, string.Format("{0}_{1}.binned", callset.Id, bamIdx)); bamToBinned[bamPath] = binnedPath; commandLine.Clear(); if (CrossPlatform.IsThisMono()) { commandLine.AppendFormat("{0} ", canvasBinPath); } commandLine.AppendFormat("-b \"{0}\" ", bamPath); if (callset.Bam.IsPairedEnd) { commandLine.AppendFormat("-p "); } commandLine.AppendFormat("-r \"{0}\" ", canvasReferencePath); commandLine.AppendFormat("-f \"{0}\" -d {1} -o \"{2}\" ", canvasBedPath, _countsPerBin, binnedPath); if (binSize != -1) { commandLine.AppendFormat("-z \"{0}\" ", binSize); } foreach (string path in intermediateDataPathsByBamPath[bamPath]) { commandLine.AppendFormat("-i \"{0}\" ", path); } commandLine.AppendFormat("-m {0} ", _coverageMode); UnitOfWork finalBinJob = new UnitOfWork() { ExecutablePath = executablePath, LoggingFolder = _workManager.LoggingFolder.FullName, LoggingStub = Path.GetFileName(binnedPath), CommandLine = commandLine.ToString() }; if (_customParameters.ContainsKey("CanvasBin")) { finalBinJob.CommandLine = Utilities.MergeCommandLineOptions(finalBinJob.CommandLine, _customParameters["CanvasBin"], true); } finalBinJobs.Add(finalBinJob); } _workManager.DoWorkParallel(finalBinJobs, new TaskResourceRequirements(8, 25)); // CanvasBin itself is multi-threaded string tumorBinnedPath = bamToBinned[callset.Bam.BamFile.FullName]; // binned tumor sample string outputPath = tumorBinnedPath; if (callset.NormalBamPaths.Any() || (callset.IsEnrichment && callset.Manifest.CanvasControlAvailable)) { outputPath = InvokeCanvasNormalize(callset, tumorBinnedPath, bamToBinned, ploidyBedPath); } return(new FileLocation(outputPath)); }