예제 #1
0
        /// <summary>
        /// Aligns reads and outputs alignment map and chimeric alignments.
        /// Note: fastqs must have \n line endings, not \r\n.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="genomeDir"></param>
        /// <param name="fastqFiles"></param>
        /// <param name="outprefix"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="genomeLoad"></param>
        /// <param name="outSamType"></param>
        /// <returns></returns>
        public static List <string> BasicAlignReadCommands(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles, string outprefix, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory, string outSamType = "BAM Unsorted")
        {
            string reads_in     = "\"" + string.Join("\" \"", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f))) + "\"";
            string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ?
                                  " --readFilesCommand zcat -c" :
                                  fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ?
                                  " --readFilesCommand bzip2 -c" :
                                  "";
            string arguments =
                " --genomeLoad " + genomeLoad.ToString() +
                " --runThreadN " + threads.ToString() +
                " --genomeDir \"" + WrapperUtility.ConvertWindowsPath(genomeDir) + "\"" +
                " --readFilesIn " + reads_in +
                " --outSAMtype " + outSamType +
                " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() +
                " --outSAMstrandField intronMotif" +            // adds XS tag to all alignments that contain a splice junction
                " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks
                " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) +
                read_command;

            string fileToCheck = WrapperUtility.ConvertWindowsPath(outprefix + (outSamType.Contains("Sorted") ? SortedBamFileSuffix : outSamType.Contains("Unsorted") ? BamFileSuffix : SpliceJunctionFileSuffix));

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " || ! -s " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " ) ]]; then STAR" + arguments + "; fi",
                File.Exists(outprefix + BamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ? "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() : ""
            });
        }
예제 #2
0
        /// <summary>
        /// Aligns reads and outputs alignment map and chimeric alignments. Duplicate reads are removed (deduped) from the alignment map, a step that's recommended for variant calling.
        /// Note: fastqs must have \n line endings, not \r\n.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="genomeDir"></param>
        /// <param name="fastqFiles"></param>
        /// <param name="outprefix"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="genomeLoad"></param>
        /// <returns></returns>
        public static List <string> AlignRNASeqReadsForVariantCalling(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles,
                                                                      string outprefix, bool overwriteStarAlignment, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory)
        {
            string reads_in     = string.Join(" ", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f)));
            string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ?
                                  " --readFilesCommand zcat -c" :
                                  fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ?
                                  " --readFilesCommand bzip2 -c" :
                                  "";

            string alignmentArguments =
                " --genomeLoad " + genomeLoad.ToString() +
                " --runMode alignReads" +
                " --runThreadN " + threads.ToString() +
                " --genomeDir " + WrapperUtility.ConvertWindowsPath(genomeDir) +
                " --readFilesIn " + reads_in +
                " --outSAMtype BAM SortedByCoordinate" +
                " --outBAMcompression 10" +
                " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() +
                " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) +

                // chimeric junction settings
                //" --chimSegmentMin 12" +
                //" --chimJunctionOverhangMin 12" +
                //" --alignSJDBoverhangMin 10" +
                //" --alignMatesGapMax 100000" +
                //" --alignIntronMax 100000" +
                //" --chimSegmentReadGapMax 3" +
                //" --alignSJstitchMismatchNmax 5 -1 5 5" +

                // stringtie parameters
                " --outSAMstrandField intronMotif" +            // adds XS tag to all alignments that contain a splice junction
                " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks

                // gatk parameters
                " --outSAMattrRGline ID:1 PU:platform  PL:illumina SM:sample LB:library" + // this could shorten the time for samples that aren't multiplexed in preprocessing for GATK
                " --outSAMmapqUnique 60" +                                                 // this is used to ensure compatibility with GATK without having to use the GATK hacks
                read_command;                                                              // note in the future, two sets of reads can be comma separated here, and the RGline can also be comma separated to distinguish them later

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),

                overwriteStarAlignment ? "" :
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " ) ]]; then",
                "  STAR" + alignmentArguments,
                overwriteStarAlignment ? "" : "fi",
                SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix)),

                overwriteStarAlignment ? "" : "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " ) ]]; then",
                "  " + StarDedupCommand(threads, outprefix + SortedBamFileSuffix, outprefix + Path.GetFileNameWithoutExtension(SortedBamFileSuffix)),
                overwriteStarAlignment ? "" : "fi",
                SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix)),

                File.Exists(outprefix + BamFileSuffix) && File.Exists(outprefix + DedupedBamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ?
                "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() :
                "",
            });
        }