/// <summary> /// Aligns reads and outputs alignment map and chimeric alignments. /// Note: fastqs must have \n line endings, not \r\n. /// </summary> /// <param name="spritzDirectory"></param> /// <param name="threads"></param> /// <param name="genomeDir"></param> /// <param name="fastqFiles"></param> /// <param name="outprefix"></param> /// <param name="strandSpecific"></param> /// <param name="genomeLoad"></param> /// <param name="outSamType"></param> /// <returns></returns> public static List <string> BasicAlignReadCommands(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles, string outprefix, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory, string outSamType = "BAM Unsorted") { string reads_in = "\"" + string.Join("\" \"", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f))) + "\""; string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ? " --readFilesCommand zcat -c" : fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ? " --readFilesCommand bzip2 -c" : ""; string arguments = " --genomeLoad " + genomeLoad.ToString() + " --runThreadN " + threads.ToString() + " --genomeDir \"" + WrapperUtility.ConvertWindowsPath(genomeDir) + "\"" + " --readFilesIn " + reads_in + " --outSAMtype " + outSamType + " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() + " --outSAMstrandField intronMotif" + // adds XS tag to all alignments that contain a splice junction " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) + read_command; string fileToCheck = WrapperUtility.ConvertWindowsPath(outprefix + (outSamType.Contains("Sorted") ? SortedBamFileSuffix : outSamType.Contains("Unsorted") ? BamFileSuffix : SpliceJunctionFileSuffix)); return(new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " || ! -s " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " ) ]]; then STAR" + arguments + "; fi", File.Exists(outprefix + BamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ? "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() : "" }); }
/// <summary> /// Aligns reads and outputs alignment map and chimeric alignments. Duplicate reads are removed (deduped) from the alignment map, a step that's recommended for variant calling. /// Note: fastqs must have \n line endings, not \r\n. /// </summary> /// <param name="spritzDirectory"></param> /// <param name="threads"></param> /// <param name="genomeDir"></param> /// <param name="fastqFiles"></param> /// <param name="outprefix"></param> /// <param name="strandSpecific"></param> /// <param name="genomeLoad"></param> /// <returns></returns> public static List <string> AlignRNASeqReadsForVariantCalling(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles, string outprefix, bool overwriteStarAlignment, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory) { string reads_in = string.Join(" ", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f))); string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ? " --readFilesCommand zcat -c" : fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ? " --readFilesCommand bzip2 -c" : ""; string alignmentArguments = " --genomeLoad " + genomeLoad.ToString() + " --runMode alignReads" + " --runThreadN " + threads.ToString() + " --genomeDir " + WrapperUtility.ConvertWindowsPath(genomeDir) + " --readFilesIn " + reads_in + " --outSAMtype BAM SortedByCoordinate" + " --outBAMcompression 10" + " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() + " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) + // chimeric junction settings //" --chimSegmentMin 12" + //" --chimJunctionOverhangMin 12" + //" --alignSJDBoverhangMin 10" + //" --alignMatesGapMax 100000" + //" --alignIntronMax 100000" + //" --chimSegmentReadGapMax 3" + //" --alignSJstitchMismatchNmax 5 -1 5 5" + // stringtie parameters " --outSAMstrandField intronMotif" + // adds XS tag to all alignments that contain a splice junction " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks // gatk parameters " --outSAMattrRGline ID:1 PU:platform PL:illumina SM:sample LB:library" + // this could shorten the time for samples that aren't multiplexed in preprocessing for GATK " --outSAMmapqUnique 60" + // this is used to ensure compatibility with GATK without having to use the GATK hacks read_command; // note in the future, two sets of reads can be comma separated here, and the RGline can also be comma separated to distinguish them later return(new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), overwriteStarAlignment ? "" : "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " ) ]]; then", " STAR" + alignmentArguments, overwriteStarAlignment ? "" : "fi", SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix)), overwriteStarAlignment ? "" : "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " ) ]]; then", " " + StarDedupCommand(threads, outprefix + SortedBamFileSuffix, outprefix + Path.GetFileNameWithoutExtension(SortedBamFileSuffix)), overwriteStarAlignment ? "" : "fi", SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix)), File.Exists(outprefix + BamFileSuffix) && File.Exists(outprefix + DedupedBamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ? "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() : "", }); }