Ejemplo n.º 1
0
        /// <summary>
        /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used.
        /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelGtfOrGffPath"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="inferStrandSpecificity"></param>
        /// <param name="outputTranscriptGtfPath"></param>
        public static List <string> AssembleTranscripts(string spritzDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome,
                                                        Strandedness strandSpecific, bool inferStrandSpecificity, out string outputTranscriptGtfPath)
        {
            Strandedness strandedness = strandSpecific;

            if (inferStrandSpecificity)
            {
                BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8);
                strandedness = bamProperties.Strandedness;
            }

            string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck");

            outputTranscriptGtfPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".gtf");
            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi",
                "bam=" + WrapperUtility.ConvertWindowsPath(bamPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi",
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " ]]; then",
                "  echo \"Performing stringtie transcript reconstruction on " + bamPath + "\"",
                "  stringtie $bam " +
                " -p " + threads.ToString() +
                " -G " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) +
                " -o " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) +
                (strandedness == Strandedness.None ? "" : strandedness == Strandedness.Forward ? "--fr" : "--rf"),
                "fi",
            });
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used.
        /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelGtfOrGffPath"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="inferStrandSpecificity"></param>
        /// <param name="outputDirectory"></param>
        public static List <string> AssembleTranscripts(string spritzDirectory, string analysisDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome, bool strandSpecific, bool inferStrandSpecificity, out string outputDirectory)
        {
            bool isStranded = strandSpecific;

            if (inferStrandSpecificity)
            {
                BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8);
                isStranded = bamProperties.Strandedness != Strandedness.None;
            }

            string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck");

            outputDirectory = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksOutput");
            string script_name = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "CufflinksRun.bash");

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi",
                "bam=" + WrapperUtility.ConvertWindowsPath(bamPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi",
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " || ! -s " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " ]]; then " +
                "cufflinks-2.2.1/cufflinks " +
                " --num-threads " + threads.ToString() +
                " --GTF-guide " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) +
                " --output-dir " + WrapperUtility.ConvertWindowsPath(outputDirectory) +
                (isStranded ? "--library-type fr-firststrand" : "") +
                " $bam" +
                "; fi",
            });
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Gets commands to calculate expression an RSEM reference
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <returns></returns>
        public List <string> CalculateExpressionCommands(string spritzDirectory, string referencePrefix, int threads, RSEMAlignerOption aligner, Strandedness strandedness,
                                                         string[] fastqPaths, bool doOuptutBam)
        {
            if (fastqPaths.Length < 1)
            {
                throw new ArgumentOutOfRangeException("No fastq files were given for RSEM calculate expression.");
            }
            if (fastqPaths.Length > 2)
            {
                throw new ArgumentOutOfRangeException("Too many fastq file types given for RSEM calculate expression.");
            }

            List <string> scriptCommands = new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "cd RSEM-1.3.0",
            };

            string[] analysisFastqPaths = fastqPaths;
            string   alignerOption      = GetAlignerOption(spritzDirectory, aligner);
            string   threadOption       = "--num-threads " + threads.ToString();
            string   strandOption       = "--strandedness " + strandedness.ToString().ToLowerInvariant();

            // Decompress files if needed
            // The '--star-gzipped-read-file' and '--star-bzipped-read-file' options work, but then the rest of RSEM doesn't when using compressed files...
            bool fastqIsGunzipped = analysisFastqPaths[0].EndsWith("gz");
            bool fastqIsBunzipped = analysisFastqPaths[0].EndsWith("bz2") || analysisFastqPaths[0].EndsWith("bz");

            if (fastqIsGunzipped || fastqIsBunzipped)
            {
                for (int i = 0; i < analysisFastqPaths.Length; i++)
                {
                    string decompressionCommand = fastqIsGunzipped ? "gunzip" : "bunzip2";
                    scriptCommands.Add($"{decompressionCommand} --keep {WrapperUtility.ConvertWindowsPath(analysisFastqPaths[i])}");
                    analysisFastqPaths[i] = Path.ChangeExtension(analysisFastqPaths[i], null);
                }
            }

            string inputOption = analysisFastqPaths.Length == 1 ? string.Join(",", analysisFastqPaths[0].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f))) :
                                 "--paired-end " +
                                 string.Join(",", analysisFastqPaths[0].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f))) +
                                 " " +
                                 string.Join(",", analysisFastqPaths[1].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f)));

            var    megabytes = Math.Floor((double)Process.GetCurrentProcess().VirtualMemorySize64 / 1000000);
            string bamOption = doOuptutBam ? "--output-genome-bam" : "--no-bam-output";

            OutputPrefix = Path.Combine(Path.GetDirectoryName(analysisFastqPaths[0].Split(',')[0]),
                                        Path.GetFileNameWithoutExtension(analysisFastqPaths[0].Split(',')[0]) +
                                        "_" + Path.GetExtension(analysisFastqPaths[0].Split(',')[0]).Substring(1).ToUpperInvariant() +
                                        referencePrefix.GetHashCode().ToString());

            // RSEM likes to sort the transcript.bam file, which takes forever and isn't very useful, I've found. Just sort the genome.bam file instead
            string samtoolsCommands = !doOuptutBam ?
                                      "" :
                                      "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(OutputPrefix + GenomeSortedBamSuffix) + " && ! -s " + WrapperUtility.ConvertWindowsPath(OutputPrefix + GenomeSortedBamSuffix) + " ]]; then\n" +
                                      "  " + SamtoolsWrapper.SortBam(OutputPrefix + GenomeBamSuffix, threads) + "\n" +
                                      "  " + SamtoolsWrapper.IndexBamCommand(OutputPrefix + GenomeSortedBamSuffix) + "\n" +
                                      "fi";

            // construct the commands
            scriptCommands.AddRange(new List <string>
            {
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(OutputPrefix + IsoformResultsSuffix) + " && ! -s " + WrapperUtility.ConvertWindowsPath(OutputPrefix + IsoformResultsSuffix) + " ]]; then " +
                "./rsem-calculate-expression " +
                "--time " +         // include timed results
                "--calc-ci " +      // posterior calculation of 95% confidence intervals
                alignerOption + " " +
                threadOption + " " +
                bamOption + " " +
                inputOption + " " +
                WrapperUtility.ConvertWindowsPath(referencePrefix) + " " +
                WrapperUtility.ConvertWindowsPath(OutputPrefix) +
                "; fi",
                samtoolsCommands
            });
            return(scriptCommands);
        }