Ejemplo n.º 1
0
        //wrapper should now handle all throwing and catching..
        protected override void ProgramExecution()
        {
            var optionsUsed = _appOptionParser.ParsingResult.OptionsUsed;

            var doNotPassToSubprocess = new List <string>()
            {
                "outFolder", "numProcesses", "exePath", "intermediateDir", "multiprocess", "chromosomes"
            };

            var cmdLineList = MultiProcessHelpers.GetCommandLineWithoutIgnoredArguments(optionsUsed, doNotPassToSubprocess);


            var refNameMapping = new Dictionary <string, int>();

            using (var bamReader = new BamReader(_options.InputBam))
            {
                var chroms = bamReader.GetReferenceNames();
                foreach (var referenceName in chroms)
                {
                    if (_options.Chromosomes != null && !_options.Chromosomes.ToList().Contains(referenceName))
                    {
                        continue;
                    }
                    refNameMapping.Add(referenceName, bamReader.GetReferenceIndex(referenceName));
                }
            }

            var taskManager     = new CliTaskManager(_options.NumProcesses);
            var geminiProcessor = new GeminiMultiProcessor(_options, new CliTaskCreator());

            var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools);

            geminiProcessor.Execute(taskManager, refNameMapping, cmdLineList, samtoolsWrapper);
        }
        private static string ProcessChromosome(Dictionary <string, int> chromRefIds, string outMultiPath, List <string> taskDirectories,
                                                string chrom, GeminiMultiOptions options)
        {
            // TODO either officially deprecate non-multiprocess-processing and remove this, or consolidate this with the Gemini calling code from Gemini/Program.cs

            var outdir       = Path.Combine(outMultiPath, chrom);
            var refId        = chromRefIds[chrom];
            var intermediate = string.IsNullOrEmpty(options.GeminiSampleOptions.IntermediateDir)
                ? null
                : Path.Combine(options.GeminiSampleOptions.IntermediateDir, chrom);
            var geminiSampleOptions = new GeminiSampleOptions
            {
                InputBam        = options.InputBam,
                OutputFolder    = outdir,
                OutputBam       = Path.Combine(outdir, "out.bam"),
                IntermediateDir = intermediate,
                RefId           = refId
            };

            // Gemini defaults different than stitcher defaults
            options.StitcherOptions.NifyUnstitchablePairs = false;

            // Set stitcher pair-filter-level duplicate filtering if skip and remove dups, to save time
            options.StitcherOptions.FilterDuplicates = options.GeminiOptions.SkipAndRemoveDups;

            var dataSourceFactory = new GeminiDataSourceFactory(options.StitcherOptions, options.GeminiOptions.GenomePath,
                                                                options.GeminiOptions.SkipAndRemoveDups, refId,
                                                                Path.Combine(outdir, "Regions.txt"), debug: options.GeminiOptions.Debug);
            var dataOutputFactory = new GeminiDataOutputFactory(options.StitcherOptions.NumThreads);
            var samtoolsWrapper   = new SamtoolsWrapper(options.GeminiOptions.SamtoolsPath, options.GeminiOptions.IsWeirdSamtools);

            var geminiWorkflow = new GeminiWorkflow(dataSourceFactory, dataOutputFactory,
                                                    options.GeminiOptions, geminiSampleOptions, options.RealignmentOptions, options.StitcherOptions, options.OutputDirectory, options.RealignmentAssessmentOptions, options.IndelFilteringOptions, samtoolsWrapper);

            Directory.CreateDirectory(outdir);
            geminiWorkflow.Execute();

            //var logger = new Illumina.CG.Common.Logging.Logger(taskLogDir, $"GeminiTaskLog_{chrom}.txt");
            //var task = _taskCreator.GetCliTask(cmdLineList.ToArray(), chrom, exePath, outdir, chromRefIds[chrom], logger,
            //    string.IsNullOrEmpty(_options.GeminiSampleOptions.IntermediateDir)
            //        ? null
            //        : Path.Combine(_options.GeminiSampleOptions.IntermediateDir, chrom));

            //tasks.Add(task);

            Console.WriteLine($"Completed Gemini Workflow for {chrom}");

            var path = (Path.Combine(outdir, "merged.bam.sorted.bam"));

            taskDirectories.Add(outdir);
            //paths[refId] = path;
            return(path);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Runs a two-pass STAR alignment for a given set of RNA-Seq fastq files,
        /// or it performs a Bowtie2 alignment for WGS or exome sequencing files.
        /// </summary>
        public void PerformAlignment()
        {
            int starThreads = Math.Min(18, Parameters.Threads); // 18 max, otherwise it throws a segmentation fault in sorting the BAM files

            if (Parameters.ExperimentType == ExperimentType.RNASequencing)
            {
                // Alignment preparation
                WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(Parameters.AnalysisDirectory, "GenomeGenerate.bash"),
                                                    STARWrapper.GenerateGenomeIndex(
                                                        Parameters.SpritzDirectory,
                                                        Parameters.Threads,
                                                        Parameters.GenomeStarIndexDirectory,
                                                        new string[] { Parameters.ReorderedFastaPath },
                                                        Parameters.GeneModelGtfOrGffPath,
                                                        Parameters.Fastqs))
                .WaitForExit();

                // there's trouble with the number of open files for sorting and stuff, which increases with the number of threads
                // 18 is the max that works with the default max number of open files
                TwoPassAlignment(starThreads, Parameters.OverwriteStarAlignment);
            }
            else
            {
                foreach (string[] fastq in Parameters.Fastqs)
                {
                    SkewerWrapper.Trim(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.Threads, 19, fastq, false, out string[] trimmedFastqs, out string skewerLog);
                    FastqsForAlignment.Add(trimmedFastqs);
                }
                TopHatWrapper.GenerateBowtieIndex(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.ReorderedFastaPath, out string bowtieIndexPrefix);
                List <string> alignmentCommands = new List <string> {
                    "echo \"Aligning reads with bowtie2.\""
                };
                foreach (string[] fastq in FastqsForAlignment)
                {
                    // alignment
                    alignmentCommands.AddRange(TopHatWrapper.Bowtie2Align(Parameters.SpritzDirectory, Parameters.AnalysisDirectory,
                                                                          bowtieIndexPrefix, Parameters.Threads, fastq, Parameters.StrandSpecific, out string sortedBamPath));
                    alignmentCommands.Add(SamtoolsWrapper.IndexBamCommand(sortedBamPath));

                    // mark duplicates
                    GATKWrapper gatk = new GATKWrapper(1);
                    alignmentCommands.AddRange(gatk.PrepareBamAndFasta(Parameters.SpritzDirectory, Parameters.AnalysisDirectory, Parameters.Threads, sortedBamPath, Parameters.ReorderedFastaPath, Parameters.Reference));
                    alignmentCommands.Add(SamtoolsWrapper.IndexBamCommand(gatk.PreparedBamPath));

                    SortedBamFiles.Add(sortedBamPath);
                    DedupedBamFiles.Add(gatk.PreparedBamPath);
                }
                WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(Parameters.AnalysisDirectory, "BowtieAlignment.bash"), alignmentCommands).WaitForExit();
            }
        }
Ejemplo n.º 4
0
        //wrapper should now handle all throwing and catching..
        protected override void ProgramExecution()
        {
            _options.GeminiSampleOptions.InputBam     = _options.InputBam;
            _options.GeminiSampleOptions.OutputFolder = _options.OutputDirectory;
            _options.GeminiSampleOptions.OutputBam    = Path.Combine(_options.OutputDirectory, "out.bam");
            _options.GeminiOptions.Debug = _options.StitcherOptions.Debug;

            // Gemini defaults different than stitcher defaults
            _options.StitcherOptions.NifyUnstitchablePairs = false;

            // Set stitcher pair-filter-level duplicate filtering if skip and remove dups, to save time
            _options.StitcherOptions.FilterDuplicates = _options.GeminiOptions.SkipAndRemoveDups;

            var dataSourceFactory = new GeminiDataSourceFactory(_options.StitcherOptions, _options.GeminiOptions.GenomePath,
                                                                _options.GeminiOptions.SkipAndRemoveDups, _options.GeminiSampleOptions.RefId, Path.Combine(_options.OutputDirectory, "Regions.txt"), debug: _options.GeminiOptions.Debug);
            var dataOutputFactory = new GeminiDataOutputFactory(_options.StitcherOptions.NumThreads);

            var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools);

            var geminiWorkflow = new GeminiWorkflow(dataSourceFactory, dataOutputFactory, _options.GeminiOptions,
                                                    _options.GeminiSampleOptions, _options.RealignmentOptions, _options.StitcherOptions, _options.OutputDirectory, _options.RealignmentAssessmentOptions, _options.IndelFilteringOptions, samtoolsWrapper);

            geminiWorkflow.Execute();
        }