/// <summary> /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used. /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for. /// </summary> /// <param name="spritzDirectory"></param> /// <param name="threads"></param> /// <param name="bamPath"></param> /// <param name="geneModelGtfOrGffPath"></param> /// <param name="strandSpecific"></param> /// <param name="inferStrandSpecificity"></param> /// <param name="outputTranscriptGtfPath"></param> public static List <string> AssembleTranscripts(string spritzDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome, Strandedness strandSpecific, bool inferStrandSpecificity, out string outputTranscriptGtfPath) { Strandedness strandedness = strandSpecific; if (inferStrandSpecificity) { BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8); strandedness = bamProperties.Strandedness; } string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck"); outputTranscriptGtfPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".gtf"); return(new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath), "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi", "bam=" + WrapperUtility.ConvertWindowsPath(bamPath), "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi", "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " ]]; then", " echo \"Performing stringtie transcript reconstruction on " + bamPath + "\"", " stringtie $bam " + " -p " + threads.ToString() + " -G " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) + " -o " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + (strandedness == Strandedness.None ? "" : strandedness == Strandedness.Forward ? "--fr" : "--rf"), "fi", }); }
/// <summary> /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used. /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for. /// </summary> /// <param name="spritzDirectory"></param> /// <param name="threads"></param> /// <param name="bamPath"></param> /// <param name="geneModelGtfOrGffPath"></param> /// <param name="strandSpecific"></param> /// <param name="inferStrandSpecificity"></param> /// <param name="outputDirectory"></param> public static List <string> AssembleTranscripts(string spritzDirectory, string analysisDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome, bool strandSpecific, bool inferStrandSpecificity, out string outputDirectory) { bool isStranded = strandSpecific; if (inferStrandSpecificity) { BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8); isStranded = bamProperties.Strandedness != Strandedness.None; } string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck"); outputDirectory = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksOutput"); string script_name = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "CufflinksRun.bash"); return(new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath), "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi", "bam=" + WrapperUtility.ConvertWindowsPath(bamPath), "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi", "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " || ! -s " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " ]]; then " + "cufflinks-2.2.1/cufflinks " + " --num-threads " + threads.ToString() + " --GTF-guide " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) + " --output-dir " + WrapperUtility.ConvertWindowsPath(outputDirectory) + (isStranded ? "--library-type fr-firststrand" : "") + " $bam" + "; fi", }); }
/// <summary> /// Gets commands to calculate expression an RSEM reference /// </summary> /// <param name="spritzDirectory"></param> /// <returns></returns> public List <string> CalculateExpressionCommands(string spritzDirectory, string referencePrefix, int threads, RSEMAlignerOption aligner, Strandedness strandedness, string[] fastqPaths, bool doOuptutBam) { if (fastqPaths.Length < 1) { throw new ArgumentOutOfRangeException("No fastq files were given for RSEM calculate expression."); } if (fastqPaths.Length > 2) { throw new ArgumentOutOfRangeException("Too many fastq file types given for RSEM calculate expression."); } List <string> scriptCommands = new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "cd RSEM-1.3.0", }; string[] analysisFastqPaths = fastqPaths; string alignerOption = GetAlignerOption(spritzDirectory, aligner); string threadOption = "--num-threads " + threads.ToString(); string strandOption = "--strandedness " + strandedness.ToString().ToLowerInvariant(); // Decompress files if needed // The '--star-gzipped-read-file' and '--star-bzipped-read-file' options work, but then the rest of RSEM doesn't when using compressed files... bool fastqIsGunzipped = analysisFastqPaths[0].EndsWith("gz"); bool fastqIsBunzipped = analysisFastqPaths[0].EndsWith("bz2") || analysisFastqPaths[0].EndsWith("bz"); if (fastqIsGunzipped || fastqIsBunzipped) { for (int i = 0; i < analysisFastqPaths.Length; i++) { string decompressionCommand = fastqIsGunzipped ? "gunzip" : "bunzip2"; scriptCommands.Add($"{decompressionCommand} --keep {WrapperUtility.ConvertWindowsPath(analysisFastqPaths[i])}"); analysisFastqPaths[i] = Path.ChangeExtension(analysisFastqPaths[i], null); } } string inputOption = analysisFastqPaths.Length == 1 ? string.Join(",", analysisFastqPaths[0].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f))) : "--paired-end " + string.Join(",", analysisFastqPaths[0].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f))) + " " + string.Join(",", analysisFastqPaths[1].Split(',').Select(f => WrapperUtility.ConvertWindowsPath(f))); var megabytes = Math.Floor((double)Process.GetCurrentProcess().VirtualMemorySize64 / 1000000); string bamOption = doOuptutBam ? "--output-genome-bam" : "--no-bam-output"; OutputPrefix = Path.Combine(Path.GetDirectoryName(analysisFastqPaths[0].Split(',')[0]), Path.GetFileNameWithoutExtension(analysisFastqPaths[0].Split(',')[0]) + "_" + Path.GetExtension(analysisFastqPaths[0].Split(',')[0]).Substring(1).ToUpperInvariant() + referencePrefix.GetHashCode().ToString()); // RSEM likes to sort the transcript.bam file, which takes forever and isn't very useful, I've found. Just sort the genome.bam file instead string samtoolsCommands = !doOuptutBam ? "" : "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(OutputPrefix + GenomeSortedBamSuffix) + " && ! -s " + WrapperUtility.ConvertWindowsPath(OutputPrefix + GenomeSortedBamSuffix) + " ]]; then\n" + " " + SamtoolsWrapper.SortBam(OutputPrefix + GenomeBamSuffix, threads) + "\n" + " " + SamtoolsWrapper.IndexBamCommand(OutputPrefix + GenomeSortedBamSuffix) + "\n" + "fi"; // construct the commands scriptCommands.AddRange(new List <string> { "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(OutputPrefix + IsoformResultsSuffix) + " && ! -s " + WrapperUtility.ConvertWindowsPath(OutputPrefix + IsoformResultsSuffix) + " ]]; then " + "./rsem-calculate-expression " + "--time " + // include timed results "--calc-ci " + // posterior calculation of 95% confidence intervals alignerOption + " " + threadOption + " " + bamOption + " " + inputOption + " " + WrapperUtility.ConvertWindowsPath(referencePrefix) + " " + WrapperUtility.ConvertWindowsPath(OutputPrefix) + "; fi", samtoolsCommands }); return(scriptCommands); }