//wrapper should now handle all throwing and catching.. protected override void ProgramExecution() { var optionsUsed = _appOptionParser.ParsingResult.OptionsUsed; var doNotPassToSubprocess = new List <string>() { "outFolder", "numProcesses", "exePath", "intermediateDir", "multiprocess", "chromosomes" }; var cmdLineList = MultiProcessHelpers.GetCommandLineWithoutIgnoredArguments(optionsUsed, doNotPassToSubprocess); var refNameMapping = new Dictionary <string, int>(); using (var bamReader = new BamReader(_options.InputBam)) { var chroms = bamReader.GetReferenceNames(); foreach (var referenceName in chroms) { if (_options.Chromosomes != null && !_options.Chromosomes.ToList().Contains(referenceName)) { continue; } refNameMapping.Add(referenceName, bamReader.GetReferenceIndex(referenceName)); } } var taskManager = new CliTaskManager(_options.NumProcesses); var geminiProcessor = new GeminiMultiProcessor(_options, new CliTaskCreator()); var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools); geminiProcessor.Execute(taskManager, refNameMapping, cmdLineList, samtoolsWrapper); }
private static string ProcessChromosome(Dictionary <string, int> chromRefIds, string outMultiPath, List <string> taskDirectories, string chrom, GeminiMultiOptions options) { // TODO either officially deprecate non-multiprocess-processing and remove this, or consolidate this with the Gemini calling code from Gemini/Program.cs var outdir = Path.Combine(outMultiPath, chrom); var refId = chromRefIds[chrom]; var intermediate = string.IsNullOrEmpty(options.GeminiSampleOptions.IntermediateDir) ? null : Path.Combine(options.GeminiSampleOptions.IntermediateDir, chrom); var geminiSampleOptions = new GeminiSampleOptions { InputBam = options.InputBam, OutputFolder = outdir, OutputBam = Path.Combine(outdir, "out.bam"), IntermediateDir = intermediate, RefId = refId }; // Gemini defaults different than stitcher defaults options.StitcherOptions.NifyUnstitchablePairs = false; // Set stitcher pair-filter-level duplicate filtering if skip and remove dups, to save time options.StitcherOptions.FilterDuplicates = options.GeminiOptions.SkipAndRemoveDups; var dataSourceFactory = new GeminiDataSourceFactory(options.StitcherOptions, options.GeminiOptions.GenomePath, options.GeminiOptions.SkipAndRemoveDups, refId, Path.Combine(outdir, "Regions.txt"), debug: options.GeminiOptions.Debug); var dataOutputFactory = new GeminiDataOutputFactory(options.StitcherOptions.NumThreads); var samtoolsWrapper = new SamtoolsWrapper(options.GeminiOptions.SamtoolsPath, options.GeminiOptions.IsWeirdSamtools); var geminiWorkflow = new GeminiWorkflow(dataSourceFactory, dataOutputFactory, options.GeminiOptions, geminiSampleOptions, options.RealignmentOptions, options.StitcherOptions, options.OutputDirectory, options.RealignmentAssessmentOptions, options.IndelFilteringOptions, samtoolsWrapper); Directory.CreateDirectory(outdir); geminiWorkflow.Execute(); //var logger = new Illumina.CG.Common.Logging.Logger(taskLogDir, $"GeminiTaskLog_{chrom}.txt"); //var task = _taskCreator.GetCliTask(cmdLineList.ToArray(), chrom, exePath, outdir, chromRefIds[chrom], logger, // string.IsNullOrEmpty(_options.GeminiSampleOptions.IntermediateDir) // ? null // : Path.Combine(_options.GeminiSampleOptions.IntermediateDir, chrom)); //tasks.Add(task); Console.WriteLine($"Completed Gemini Workflow for {chrom}"); var path = (Path.Combine(outdir, "merged.bam.sorted.bam")); taskDirectories.Add(outdir); //paths[refId] = path; return(path); }
/// <summary> /// Runs a two-pass STAR alignment for a given set of RNA-Seq fastq files, /// or it performs a Bowtie2 alignment for WGS or exome sequencing files. /// </summary> public void PerformAlignment() { int starThreads = Math.Min(18, Parameters.Threads); // 18 max, otherwise it throws a segmentation fault in sorting the BAM files if (Parameters.ExperimentType == ExperimentType.RNASequencing) { // Alignment preparation WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(Parameters.AnalysisDirectory, "GenomeGenerate.bash"), STARWrapper.GenerateGenomeIndex( Parameters.SpritzDirectory, Parameters.Threads, Parameters.GenomeStarIndexDirectory, new string[] { Parameters.ReorderedFastaPath }, Parameters.GeneModelGtfOrGffPath, Parameters.Fastqs)) .WaitForExit(); // there's trouble with the number of open files for sorting and stuff, which increases with the number of threads // 18 is the max that works with the default max number of open files TwoPassAlignment(starThreads, Parameters.OverwriteStarAlignment); } else { foreach (string[] fastq in Parameters.Fastqs) { SkewerWrapper.Trim(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.Threads, 19, fastq, false, out string[] trimmedFastqs, out string skewerLog); FastqsForAlignment.Add(trimmedFastqs); } TopHatWrapper.GenerateBowtieIndex(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.ReorderedFastaPath, out string bowtieIndexPrefix); List <string> alignmentCommands = new List <string> { "echo \"Aligning reads with bowtie2.\"" }; foreach (string[] fastq in FastqsForAlignment) { // alignment alignmentCommands.AddRange(TopHatWrapper.Bowtie2Align(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, bowtieIndexPrefix, Parameters.Threads, fastq, Parameters.StrandSpecific, out string sortedBamPath)); alignmentCommands.Add(SamtoolsWrapper.IndexBamCommand(sortedBamPath)); // mark duplicates GATKWrapper gatk = new GATKWrapper(1); alignmentCommands.AddRange(gatk.PrepareBamAndFasta(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.Threads, sortedBamPath, Parameters.ReorderedFastaPath, Parameters.Reference)); alignmentCommands.Add(SamtoolsWrapper.IndexBamCommand(gatk.PreparedBamPath)); SortedBamFiles.Add(sortedBamPath); DedupedBamFiles.Add(gatk.PreparedBamPath); } WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(Parameters.AnalysisDirectory, "BowtieAlignment.bash"), alignmentCommands).WaitForExit(); } }
//wrapper should now handle all throwing and catching.. protected override void ProgramExecution() { _options.GeminiSampleOptions.InputBam = _options.InputBam; _options.GeminiSampleOptions.OutputFolder = _options.OutputDirectory; _options.GeminiSampleOptions.OutputBam = Path.Combine(_options.OutputDirectory, "out.bam"); _options.GeminiOptions.Debug = _options.StitcherOptions.Debug; // Gemini defaults different than stitcher defaults _options.StitcherOptions.NifyUnstitchablePairs = false; // Set stitcher pair-filter-level duplicate filtering if skip and remove dups, to save time _options.StitcherOptions.FilterDuplicates = _options.GeminiOptions.SkipAndRemoveDups; var dataSourceFactory = new GeminiDataSourceFactory(_options.StitcherOptions, _options.GeminiOptions.GenomePath, _options.GeminiOptions.SkipAndRemoveDups, _options.GeminiSampleOptions.RefId, Path.Combine(_options.OutputDirectory, "Regions.txt"), debug: _options.GeminiOptions.Debug); var dataOutputFactory = new GeminiDataOutputFactory(_options.StitcherOptions.NumThreads); var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools); var geminiWorkflow = new GeminiWorkflow(dataSourceFactory, dataOutputFactory, _options.GeminiOptions, _options.GeminiSampleOptions, _options.RealignmentOptions, _options.StitcherOptions, _options.OutputDirectory, _options.RealignmentAssessmentOptions, _options.IndelFilteringOptions, samtoolsWrapper); geminiWorkflow.Execute(); }