Ejemplo n.º 1
0
        /// <summary>
        /// Run star fusion from fastqs
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="analysisDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="fastqs"></param>
        /// <param name="outdir"></param>
        /// <returns></returns>
        public List <string> RunStarFusion(string spritzDirectory, string analysisDirectory, int threads, string[] fastqs)
        {
            if (ReferenceLibraryDirectory == null)
            {
                throw new FileNotFoundException("STAR-Fusion reference library was not generated prior to running STAR-Fusion.");
            }
            if (fastqs == null || fastqs.Length == 0)
            {
                throw new ArgumentException("No fastqs were passed into STAR-Fusion.");
            }

            OutputDirectoryPath = Path.Combine(Path.GetDirectoryName(fastqs[0]), Path.GetFileNameWithoutExtension(fastqs[0]) + "FusionAnalysis");
            Directory.CreateDirectory(OutputDirectoryPath);
            string tmp = Path.Combine(analysisDirectory, "_STARFusionTmp");

            Directory.CreateDirectory(tmp);

            string arguments =
                " --examine_coding_effect" +
                " --left_fq " + WrapperUtility.ConvertWindowsPath(fastqs[0]) +
                (fastqs.Length > 1 ? " --right_fq " + WrapperUtility.ConvertWindowsPath(fastqs[1]) : "") +
                " --CPU " + threads.ToString() +
                " --output_dir " + WrapperUtility.ConvertWindowsPath(OutputDirectoryPath) +
                " --genome_lib_dir " + WrapperUtility.ConvertWindowsPath(ReferenceLibraryDirectory) +
                " --tmpdir " + WrapperUtility.ConvertWindowsPath(tmp);

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                StarFusionDirectoryName + "/STAR-Fusion " + arguments
            });
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Aligns reads and outputs alignment map and chimeric alignments.
        /// Note: fastqs must have \n line endings, not \r\n.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="genomeDir"></param>
        /// <param name="fastqFiles"></param>
        /// <param name="outprefix"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="genomeLoad"></param>
        /// <param name="outSamType"></param>
        /// <returns></returns>
        public static List <string> BasicAlignReadCommands(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles, string outprefix, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory, string outSamType = "BAM Unsorted")
        {
            string reads_in     = "\"" + string.Join("\" \"", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f))) + "\"";
            string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ?
                                  " --readFilesCommand zcat -c" :
                                  fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ?
                                  " --readFilesCommand bzip2 -c" :
                                  "";
            string arguments =
                " --genomeLoad " + genomeLoad.ToString() +
                " --runThreadN " + threads.ToString() +
                " --genomeDir \"" + WrapperUtility.ConvertWindowsPath(genomeDir) + "\"" +
                " --readFilesIn " + reads_in +
                " --outSAMtype " + outSamType +
                " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() +
                " --outSAMstrandField intronMotif" +            // adds XS tag to all alignments that contain a splice junction
                " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks
                " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) +
                read_command;

            string fileToCheck = WrapperUtility.ConvertWindowsPath(outprefix + (outSamType.Contains("Sorted") ? SortedBamFileSuffix : outSamType.Contains("Unsorted") ? BamFileSuffix : SpliceJunctionFileSuffix));

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " || ! -s " + WrapperUtility.ConvertWindowsPath(fileToCheck) + " ) ]]; then STAR" + arguments + "; fi",
                File.Exists(outprefix + BamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ? "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() : ""
            });
        }
Ejemplo n.º 3
0
        public List <string> PrimaryVariantAnnotation(string spritzDirectory, string reference, string inputVcfPath, bool fromReference = false)
        {
            string outPrefix = Path.Combine(Path.GetDirectoryName(inputVcfPath), Path.GetFileNameWithoutExtension(inputVcfPath));

            AnnotatedVcfPath          = outPrefix + ".snpEffAnnotated.vcf";
            HtmlReportPath            = outPrefix + ".snpEffAnnotated.html";
            AnnotatedGenesSummaryPath = outPrefix + ".snpEffAnnotated.genes.txt";
            VariantProteinFastaPath   = outPrefix + ".snpEffAnnotated.protein.fasta";
            VariantProteinXmlPath     = outPrefix + ".snpEffAnnotated.protein.xml";
            Directory.CreateDirectory(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data"));
            string[] existingDatabases = Directory.GetDirectories(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data"));
            if (File.Exists(AnnotatedVcfPath) && new FileInfo(AnnotatedVcfPath).Length > 0)
            {
                return(new List <string>());
            }
            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                SnpEff(Workers) + " -v -stats " + WrapperUtility.ConvertWindowsPath(HtmlReportPath) +
                " -fastaProt " + WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath) +
                " -xmlProt " + WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath) +
                " " + Path.GetFileName(existingDatabases.FirstOrDefault(x => Path.GetFileName(x).StartsWith(reference, true, null))) +
                (fromReference ? "" : $" {WrapperUtility.ConvertWindowsPath(inputVcfPath)} > {WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)}"),

                // ensure that the files get closed before continuing
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)),
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath)),
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath)),

                // remove the annotated VCF file if snpEff didn't work, e.g. if there was no VCF file to annotate
                "if [[ ( -f " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " && ! -s " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " ) ]]; then",
                "  rm " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath),
                "fi",
            });
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Scalpel installation folder cannot be moved and still work. There must be some hard-coded path references.
        /// If needed, delete and reinstall in another location.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <returns>true if no fix needed, false if fix performed</returns>
        public bool CheckInstallation(string spritzDirectory)
        {
            // Don't go further if installation hasn't been run at all
            string scalpelLocationFile = Path.Combine(spritzDirectory, "Tools", ScalpelLocationCheckFilename);

            if (!File.Exists(scalpelLocationFile))
            {
                return(false);
            }

            // Remove and reinstall if it moved
            string removeScriptPath = WriteRemoveScript(spritzDirectory);
            string scriptPath       = WrapperUtility.GetInstallationScriptPath(spritzDirectory, "CheckScalpelInstallation.bash");
            string expectedLocation = WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "scalpel-" + ScalpelVersion));
            bool   isSame           = File.ReadAllText(scalpelLocationFile).TrimEnd() == expectedLocation.Trim('"');

            if (!isSame)
            {
                WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
                {
                    WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                    "bash " + WrapperUtility.ConvertWindowsPath(removeScriptPath),
                    "bash " + WrapperUtility.ConvertWindowsPath(WriteInstallScript(spritzDirectory)),
                }).WaitForExit();
            }
            return(isSame);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used.
        /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelGtfOrGffPath"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="inferStrandSpecificity"></param>
        /// <param name="outputTranscriptGtfPath"></param>
        public static List <string> AssembleTranscripts(string spritzDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome,
                                                        Strandedness strandSpecific, bool inferStrandSpecificity, out string outputTranscriptGtfPath)
        {
            Strandedness strandedness = strandSpecific;

            if (inferStrandSpecificity)
            {
                BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8);
                strandedness = bamProperties.Strandedness;
            }

            string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck");

            outputTranscriptGtfPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".gtf");
            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi",
                "bam=" + WrapperUtility.ConvertWindowsPath(bamPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi",
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) + " ]]; then",
                "  echo \"Performing stringtie transcript reconstruction on " + bamPath + "\"",
                "  stringtie $bam " +
                " -p " + threads.ToString() +
                " -G " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) +
                " -o " + WrapperUtility.ConvertWindowsPath(outputTranscriptGtfPath) +
                (strandedness == Strandedness.None ? "" : strandedness == Strandedness.Forward ? "--fr" : "--rf"),
                "fi",
            });
        }
Ejemplo n.º 6
0
        // Need to filter VCF by FILTER = PASS; there are several reasons they don't accept calls that I trust
        // There's an attribute "ZYG" for zygosity, either "het" or "h**o" for heterozygous or homozygous
        public List <string> CallIndels(string spritzDirectory, int threads, string genomeFastaP, string bedPath, string bamPath, string outdir)
        {
            CheckInstallation(spritzDirectory);
            var vcftools = new VcfToolsWrapper();

            IndelVcfPath = Path.Combine(outdir, "variants.indel.vcf");
            //IndelVcf1IndexedPath = Path.Combine(outdir, "variants.indel.1index.vcf");
            var commands = new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " ]]; then ",
                "  scalpel-" + ScalpelVersion + "/scalpel-discovery --single " +
                "--bam " + WrapperUtility.ConvertWindowsPath(bamPath) +
                " --ref " + WrapperUtility.ConvertWindowsPath(genomeFastaP) +
                " --bed " + WrapperUtility.ConvertWindowsPath(bedPath) +
                " --numprocs " + threads.ToString() +
                " --dir " + WrapperUtility.ConvertWindowsPath(outdir),

                // scalpel uses 0-indexing, where SnpEff uses 1-indexing, so change this output to match snpeff
                //"  awk 'BEGIN{OFS=\"\t\"}{ if (substr($0, 1, 1) != \"#\") $2=++$2; print $0 }' " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " > " + WrapperUtility.ConvertWindowsPath(IndelVcf1IndexedPath),
                "fi",

                // vcf-concat doesn't keep all INFO header lines, so just dump the INFO from each variant
                vcftools.RemoveAllSnvs(spritzDirectory, IndelVcfPath, false, true)
            };

            FilteredIndelVcfPath = vcftools.VcfWithoutSnvsPath;
            return(commands);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Prepares an Ensembl genome fasta for alignment and all following analysis. The main issue is that Ensembl orders chromosomes lexigraphically, not karyotypically, like some software like GATK expects.
        /// </summary>
        /// <param name="genomeFasta"></param>
        /// <param name="ensemblGenome"></param>
        /// <param name="reorderedFasta"></param>
        public void PrepareEnsemblGenomeFasta(string analysisDirectory, string genomeFasta)
        {
            if (Path.GetExtension(genomeFasta) == ".gz" || Path.GetExtension(genomeFasta) == ".tgz")
            {
                WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "Gzippy.bash"), new List <string> {
                    $"gunzip {WrapperUtility.ConvertWindowsPath(genomeFasta)}"
                }).WaitForExit();
                genomeFasta = Path.ChangeExtension(genomeFasta, null);
            }

            // We need to use the same fasta file throughout and have all the VCF and GTF chromosome reference IDs be the same as these.
            // Right now this is based on ensembl references, so those are the chromosome IDs I will be using throughout
            // TODO: try this with UCSC references to judge whether there's a difference in quality / yield / FDR etc in subsequent proteomics analysis
            // This file needs to be in karyotypic order; this allows us not to have to reorder it for GATK analysis
            ReorderedFastaPath = Path.Combine(Path.GetDirectoryName(genomeFasta), Path.GetFileNameWithoutExtension(genomeFasta) + ".karyotypic.fa");
            EnsemblGenome      = new Genome(genomeFasta);
            if (!EnsemblGenome.IsKaryotypic())
            {
                EnsemblGenome.Chromosomes = EnsemblGenome.KaryotypicOrder();
                if (!File.Exists(ReorderedFastaPath))
                {
                    Genome.WriteFasta(EnsemblGenome.Chromosomes.Select(x => x.Sequence), ReorderedFastaPath);
                }
            }
            else
            {
                ReorderedFastaPath = genomeFasta;
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Creates recalibration table and recalibrates reads.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="genomeFasta"></param>
        /// <param name="bam"></param>
        /// <param name="recalibrationTablePath"></param>
        /// <param name="knownSitesVcf"></param>
        public List <string> BaseRecalibration(string spritzDirectory, string analysisDirectory, string genomeFasta, string bam, string knownSitesVcf)
        {
            RecalibrationTablePath = Path.Combine(Path.GetDirectoryName(bam), Path.GetFileNameWithoutExtension(bam) + ".recaltable");
            RecalibratedBamPath    = Path.Combine(Path.GetDirectoryName(bam), Path.GetFileNameWithoutExtension(bam) + ".recal.bam");

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                SamtoolsWrapper.GenomeFastaIndexCommand(genomeFasta),
                GenomeDictionaryIndexCommand(genomeFasta),

                // check that reference VCF is indexed
                "if [ ! -f " + WrapperUtility.ConvertWindowsPath(knownSitesVcf) + ".idx ]; then " + Gatk(Workers) + " IndexFeatureFile -F " + WrapperUtility.ConvertWindowsPath(knownSitesVcf) + "; fi",

                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(RecalibrationTablePath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(RecalibrationTablePath) + " ) ]]; then " +
                Gatk(Workers) +
                " BaseRecalibrator" +
                " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) +
                " -I " + WrapperUtility.ConvertWindowsPath(bam) +
                (knownSitesVcf != "" ? " --known-sites " + WrapperUtility.ConvertWindowsPath(knownSitesVcf) : "") +
                " -O " + WrapperUtility.ConvertWindowsPath(RecalibrationTablePath) +
                "; fi",

                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(RecalibratedBamPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(RecalibratedBamPath) + " ) ]]; then " +
                Gatk(Workers) +
                " ApplyBQSR" +
                " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) +
                " -I " + WrapperUtility.ConvertWindowsPath(bam) +
                " --bqsr-recal-file " + WrapperUtility.ConvertWindowsPath(RecalibrationTablePath) +
                " -O " + WrapperUtility.ConvertWindowsPath(RecalibratedBamPath) +
                "; fi",
                SamtoolsWrapper.IndexBamCommand(RecalibratedBamPath),
            });
        }
Ejemplo n.º 9
0
 public static void Align(string spritzDirectory, string analysisDirectory, string IndexPrefix, string[] fastqPaths, out string outputDirectory)
 {
     if (fastqPaths.Length == 1)
     {
         outputDirectory = "Hisat2OutUnpaired.sam";
         WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "Hisat2Align.bash"), new List <string>
         {
             WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
             "hisat2-2.1.0/hisat2 -q -x" +
             " " + WrapperUtility.ConvertWindowsPath(IndexPrefix) +
             " -U " + string.Join(",", fastqPaths.Select(x => WrapperUtility.ConvertWindowsPath(x))) +
             " -S " + outputDirectory,
         }).WaitForExit();
     }
     else
     {
         outputDirectory = "Hisat2OutPaired.sam";
         WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "Hisat2Align.bash"), new List <string>
         {
             WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
             "hisat2-2.1.0/hisat2 -q -x" +
             " " + WrapperUtility.ConvertWindowsPath(IndexPrefix) +
             " -1 " + string.Join(",", WrapperUtility.ConvertWindowsPath(fastqPaths[0])) +
             " -2 " + string.Join(",", WrapperUtility.ConvertWindowsPath(fastqPaths[1])) +
             " -S " + outputDirectory,
         }).WaitForExit();
     }
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Transcript assembly. Note that fragment bias estimation (--frag-bias-correct) and multi-read rescuing (--multi-read-correct) are not used.
        /// These take a lot of time, and they only provide better abundance estimates, which we use RSEM for.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelGtfOrGffPath"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="inferStrandSpecificity"></param>
        /// <param name="outputDirectory"></param>
        public static List <string> AssembleTranscripts(string spritzDirectory, string analysisDirectory, int threads, string bamPath, string geneModelGtfOrGffPath, Genome genome, bool strandSpecific, bool inferStrandSpecificity, out string outputDirectory)
        {
            bool isStranded = strandSpecific;

            if (inferStrandSpecificity)
            {
                BAMProperties bamProperties = new BAMProperties(bamPath, geneModelGtfOrGffPath, genome, 0.8);
                isStranded = bamProperties.Strandedness != Strandedness.None;
            }

            string sortedCheckPath = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksSortCheck");

            outputDirectory = Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".cufflinksOutput");
            string script_name = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "CufflinksRun.bash");

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bamPath) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then " + SamtoolsWrapper.SortBam(bamPath, threads) + "; fi",
                "bam=" + WrapperUtility.ConvertWindowsPath(bamPath),
                "if [ ! -s " + WrapperUtility.ConvertWindowsPath(sortedCheckPath) + " ]; then bam=" + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath) + ".sorted.bam")) + "; fi",
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " || ! -s " + WrapperUtility.ConvertWindowsPath(Path.Combine(outputDirectory, TranscriptsFilename)) + " ]]; then " +
                "cufflinks-2.2.1/cufflinks " +
                " --num-threads " + threads.ToString() +
                " --GTF-guide " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGffPath) +
                " --output-dir " + WrapperUtility.ConvertWindowsPath(outputDirectory) +
                (isStranded ? "--library-type fr-firststrand" : "") +
                " $bam" +
                "; fi",
            });
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Gets commands to prepare an RSEM reference
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="referenceFastaPath"></param>
        /// <param name="referencePrefix"></param>
        /// <param name="threads"></param>
        /// <param name="geneModelPath"></param>
        /// <param name="aligner"></param>
        /// <returns></returns>
        public List <string> PrepareReferenceCommands(string spritzDirectory, string referenceFastaPath, int threads, string geneModelPath, RSEMAlignerOption aligner)
        {
            // make option strings, including putting reference files into a new directory
            string alignerOption            = GetAlignerOption(spritzDirectory, aligner);
            string threadOption             = "--num-threads " + threads.ToString();
            string referencePrefixDirectory = Path.Combine(Path.GetDirectoryName(referenceFastaPath), Path.GetFileNameWithoutExtension(referenceFastaPath)) +
                                              (aligner == RSEMAlignerOption.STAR ? "RsemStarReference" : "RsemBowtieReference") +
                                              "_" + Path.GetExtension(geneModelPath).Substring(1).ToUpperInvariant() + geneModelPath.GetHashCode().ToString();

            ReferenceIndexPrefix = Path.Combine(referencePrefixDirectory, Path.GetFileNameWithoutExtension(referenceFastaPath));
            string geneModelOption = Path.GetExtension(geneModelPath).StartsWith(".gff") ? "--gff3 " + WrapperUtility.ConvertWindowsPath(geneModelPath) :
                                     Path.GetExtension(geneModelPath) == ".gtf" ? "--gtf " + WrapperUtility.ConvertWindowsPath(geneModelPath) :
                                     null;

            // construct the commands
            var scriptStrings = new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "cd RSEM-1.3.0",
                "mkdir " + WrapperUtility.ConvertWindowsPath(referencePrefixDirectory),
                "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(Path.Combine(referencePrefixDirectory, "SA")) + " && ! -s " + WrapperUtility.ConvertWindowsPath(Path.Combine(referencePrefixDirectory, "SA")) + " ]]; then " +
                "./rsem-prepare-reference " +
                geneModelOption + " " +
                alignerOption + " " +
                threadOption + " " +
                WrapperUtility.ConvertWindowsPath(referenceFastaPath) + " " +
                WrapperUtility.ConvertWindowsPath(ReferenceIndexPrefix) +
                "; fi"
            };

            return(scriptStrings);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Run star fusion from chimericOutJunctions
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="analysisDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="chimericOutJunction"></param>
        /// <param name="outdir"></param>
        /// <returns></returns>
        public List <string> RunStarFusion(string spritzDirectory, string analysisDirectory, int threads, string chimericOutJunction)
        {
            if (ReferenceLibraryDirectory == null)
            {
                throw new FileNotFoundException("STAR-Fusion reference library was not generated prior to running STAR-Fusion.");
            }

            OutputDirectoryPath = Path.Combine(Path.GetDirectoryName(chimericOutJunction), Path.GetFileNameWithoutExtension(chimericOutJunction) + "FusionAnalysis");
            Directory.CreateDirectory(OutputDirectoryPath);
            string tmp = Path.Combine(analysisDirectory, "_STARFusionTmp");

            Directory.CreateDirectory(tmp);

            string arguments =
                " --examine_coding_effect" +
                " --CPU " + threads.ToString() +
                " --output_dir " + WrapperUtility.ConvertWindowsPath(OutputDirectoryPath) +
                " --genome_lib_dir " + WrapperUtility.ConvertWindowsPath(ReferenceLibraryDirectory) +
                " --chimeric_junction " + WrapperUtility.ConvertWindowsPath(chimericOutJunction) +
                " --tmpdir " + WrapperUtility.ConvertWindowsPath(tmp);

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                StarFusionDirectoryName + "/STAR-Fusion " + arguments
            });
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Aligns reads and outputs alignment map and chimeric alignments. Duplicate reads are removed (deduped) from the alignment map, a step that's recommended for variant calling.
        /// Note: fastqs must have \n line endings, not \r\n.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="genomeDir"></param>
        /// <param name="fastqFiles"></param>
        /// <param name="outprefix"></param>
        /// <param name="strandSpecific"></param>
        /// <param name="genomeLoad"></param>
        /// <returns></returns>
        public static List <string> AlignRNASeqReadsForVariantCalling(string spritzDirectory, int threads, string genomeDir, string[] fastqFiles,
                                                                      string outprefix, bool overwriteStarAlignment, bool strandSpecific = true, STARGenomeLoadOption genomeLoad = STARGenomeLoadOption.NoSharedMemory)
        {
            string reads_in     = string.Join(" ", fastqFiles.Select(f => WrapperUtility.ConvertWindowsPath(f)));
            string read_command = fastqFiles.Any(f => Path.GetExtension(f) == ".gz") ?
                                  " --readFilesCommand zcat -c" :
                                  fastqFiles.Any(f => Path.GetExtension(f) == ".bz2") ?
                                  " --readFilesCommand bzip2 -c" :
                                  "";

            string alignmentArguments =
                " --genomeLoad " + genomeLoad.ToString() +
                " --runMode alignReads" +
                " --runThreadN " + threads.ToString() +
                " --genomeDir " + WrapperUtility.ConvertWindowsPath(genomeDir) +
                " --readFilesIn " + reads_in +
                " --outSAMtype BAM SortedByCoordinate" +
                " --outBAMcompression 10" +
                " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() +
                " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outprefix) +

                // chimeric junction settings
                //" --chimSegmentMin 12" +
                //" --chimJunctionOverhangMin 12" +
                //" --alignSJDBoverhangMin 10" +
                //" --alignMatesGapMax 100000" +
                //" --alignIntronMax 100000" +
                //" --chimSegmentReadGapMax 3" +
                //" --alignSJstitchMismatchNmax 5 -1 5 5" +

                // stringtie parameters
                " --outSAMstrandField intronMotif" +            // adds XS tag to all alignments that contain a splice junction
                " --outFilterIntronMotifs RemoveNoncanonical" + // for cufflinks

                // gatk parameters
                " --outSAMattrRGline ID:1 PU:platform  PL:illumina SM:sample LB:library" + // this could shorten the time for samples that aren't multiplexed in preprocessing for GATK
                " --outSAMmapqUnique 60" +                                                 // this is used to ensure compatibility with GATK without having to use the GATK hacks
                read_command;                                                              // note in the future, two sets of reads can be comma separated here, and the RGline can also be comma separated to distinguish them later

            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),

                overwriteStarAlignment ? "" :
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix) + " ) ]]; then",
                "  STAR" + alignmentArguments,
                overwriteStarAlignment ? "" : "fi",
                SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + SortedBamFileSuffix)),

                overwriteStarAlignment ? "" : "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " || ! -s " + WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix) + " ) ]]; then",
                "  " + StarDedupCommand(threads, outprefix + SortedBamFileSuffix, outprefix + Path.GetFileNameWithoutExtension(SortedBamFileSuffix)),
                overwriteStarAlignment ? "" : "fi",
                SamtoolsWrapper.IndexBamCommand(WrapperUtility.ConvertWindowsPath(outprefix + DedupedBamFileSuffix)),

                File.Exists(outprefix + BamFileSuffix) && File.Exists(outprefix + DedupedBamFileSuffix) && genomeLoad == STARGenomeLoadOption.LoadAndRemove ?
                "STAR --genomeLoad " + STARGenomeLoadOption.Remove.ToString() :
                "",
            });
        }
Ejemplo n.º 14
0
 /// <summary>
 /// Get command to concatenate a set of VCF files
 /// </summary>
 /// <param name="spritzDirectory"></param>
 /// <param name="vcfInputs">Windows-formatted VCF paths</param>
 /// <param name="outPrefix"></param>
 /// <returns>command to run vcftools to concatenate a set of VCF files</returns>
 public string Concatenate(string spritzDirectory, IEnumerable <string> vcfInputs, string outPrefix)
 {
     VcfConcatenatedPath = outPrefix + ".concat.vcf";
     return
         ("if [ ! -f " + WrapperUtility.ConvertWindowsPath(VcfConcatenatedPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(VcfConcatenatedPath) + " ]; then " +
          "vcf-concat " + string.Join(" ", vcfInputs.Select(v => WrapperUtility.ConvertWindowsPath(v))) + " > " + WrapperUtility.ConvertWindowsPath(VcfConcatenatedPath) +
          "; fi");
 }
Ejemplo n.º 15
0
 public static string GetSequencesFromFasta(string inputFasta, IEnumerable <string> sequenceNames, string outputFasta)
 {
     if (sequenceNames.Any(name => name.Contains(" ")))
     {
         throw new ArgumentException("A sequence name query had a space in it; this is not supported" + string.Join(",", sequenceNames) + ".");
     }
     return("if [ ! -f " + WrapperUtility.ConvertWindowsPath(outputFasta) + " ]; then samtools faidx " + WrapperUtility.ConvertWindowsPath(inputFasta) +
            " " + string.Join(" ", sequenceNames) + " > " + WrapperUtility.ConvertWindowsPath(outputFasta) + "; fi");
 }
Ejemplo n.º 16
0
        public static void FilterGeneModel(string analysisDirectory, string geneModelGtfOrGff, Genome genome, out string filteredGeneModel)
        {
            string grepQuery = "\"^" + string.Join(@"\|^", genome.Chromosomes.Select(c => c.FriendlyName).Concat(new[] { "#" }).ToList()) + "\"";

            filteredGeneModel = Path.Combine(Path.GetDirectoryName(geneModelGtfOrGff), Path.GetFileNameWithoutExtension(geneModelGtfOrGff)) + ".filtered" + Path.GetExtension(geneModelGtfOrGff);
            WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "FilterGeneModel.bash"), new List <string>
            {
                "grep " + grepQuery + " " + WrapperUtility.ConvertWindowsPath(geneModelGtfOrGff) + " > " + WrapperUtility.ConvertWindowsPath(filteredGeneModel)
            }).WaitForExit();
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Groups (I'm just using one group, so it's more a formality) and sorts reads. Marks duplicates.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="threads"></param>
        /// <param name="bam"></param>
        /// <param name="genomeFasta"></param>
        /// <param name="reference"></param>
        /// <param name="newBam"></param>
        /// <param name="convertToUCSC"></param>
        public List <string> PrepareBamAndFasta(string spritzDirectory, string analysisDirectory, int threads, string bam, string genomeFasta, string reference)
        {
            string sortedCheckPath         = Path.Combine(Path.GetDirectoryName(bam), Path.GetFileNameWithoutExtension(bam) + ".headerSorted");
            string readGroupedCheckfile    = Path.Combine(Path.GetDirectoryName(bam), Path.GetFileNameWithoutExtension(bam) + ".headerReadGrouped");
            string sortedBam               = Path.Combine(Path.GetDirectoryName(bam), Path.GetFileNameWithoutExtension(bam) + ".sorted.bam");
            string groupedBamPath          = Path.Combine(Path.GetDirectoryName(sortedBam), Path.GetFileNameWithoutExtension(sortedBam) + ".grouped.bam");
            string markedDuplicatesBamPath = Path.Combine(Path.GetDirectoryName(groupedBamPath), Path.GetFileNameWithoutExtension(groupedBamPath) + ".marked.bam");
            string markedDuplicateMetrics  = Path.Combine(Path.GetDirectoryName(groupedBamPath), Path.GetFileNameWithoutExtension(groupedBamPath) + ".marked.metrics");

            string tmpDir = Path.Combine(spritzDirectory, "tmp");

            Directory.CreateDirectory(tmpDir);
            List <string> commands = new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),

                SamtoolsWrapper.GenomeFastaIndexCommand(genomeFasta),
                GenomeDictionaryIndexCommand(genomeFasta),

                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bam) + " | grep SO:coordinate > " + WrapperUtility.ConvertWindowsPath(sortedCheckPath),
                "samtools view -H " + WrapperUtility.ConvertWindowsPath(bam) + " | grep '^@RG' > " + WrapperUtility.ConvertWindowsPath(readGroupedCheckfile),

                // group and sort (note, using picard-tools works, but picard.jar somehow is trucating the BAM files)
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(groupedBamPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(groupedBamPath) + " ) && " +
                " ( ! -f " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " ) ]]; then " +
                Gatk(Workers, 2) +
                " AddOrReplaceReadGroups" +
                " -PU platform  -PL illumina -SM sample -LB library" +
                " -I " + WrapperUtility.ConvertWindowsPath(bam) +
                " -O " + WrapperUtility.ConvertWindowsPath(groupedBamPath) +
                " -SO coordinate" +
                " --TMP_DIR " + WrapperUtility.ConvertWindowsPath(tmpDir) +
                "; fi",
                SamtoolsWrapper.IndexBamCommand(groupedBamPath),

                // mark duplicates (AS means assume sorted; note, using picard-tools works, but picard.jar somehow is trucating the BAM files)
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " ) ]]; then " +
                Gatk(Workers) +
                " MarkDuplicates" +     // formerly picard
                " -I " + WrapperUtility.ConvertWindowsPath(groupedBamPath) +
                " -O " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) +
                " -M " + WrapperUtility.ConvertWindowsPath(markedDuplicateMetrics) +
                " --TMP_DIR " + WrapperUtility.ConvertWindowsPath(tmpDir) +
                " -AS true" +
                "; fi",
                SamtoolsWrapper.IndexBamCommand(markedDuplicatesBamPath),

                // clean up
                "if [[ ( ! -f " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(markedDuplicatesBamPath) + " ) ]]; then " +
                "rm " + WrapperUtility.ConvertWindowsPath(groupedBamPath) + "; fi",
            };

            PreparedBamPath = markedDuplicatesBamPath;
            return(commands);
        }
Ejemplo n.º 18
0
        // see here for how to generate them from scratch: http://lab.loman.net/2012/11/16/how-to-get-snpeff-working-with-bacterial-genomes-from-ncbi/
        public void DownloadSnpEffDatabase(string spritzDirectory, string analysisDirectory, string reference)
        {
            DatabaseListPath = Path.Combine(spritzDirectory, "snpEffDatabases.txt");

            // check for existing list and database
            bool   databaseListExists = File.Exists(DatabaseListPath);
            string databaseDirectory  = Path.Combine(spritzDirectory, "Tools", "SnpEff", "data");

            string[] existingDatabases = Directory.Exists(databaseDirectory) ? Directory.GetDirectories(databaseDirectory) : new string[0];
            bool     databaseExists    = existingDatabases.Any(d => Path.GetFileName(d).StartsWith(reference, true, null));

            if (databaseListExists && databaseExists)
            {
                return;
            }

            // download database list
            string scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownloadList.bash");

            WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "echo \"Downloading list of SnpEff references\"",
                SnpEff(Workers) + " databases > " + WrapperUtility.ConvertWindowsPath(DatabaseListPath),
                WrapperUtility.EnsureClosedFileCommands(DatabaseListPath)
            }).WaitForExit();

            List <string> databases = new List <string>();

            using (StreamReader reader = new StreamReader(DatabaseListPath))
            {
                while (true)
                {
                    string line = reader.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    databases.Add(line.Split('\t')[0].TrimEnd());
                }
            }
            string snpeffReference = databases.FirstOrDefault(d => d.StartsWith(reference, true, CultureInfo.InvariantCulture));

            // download database (it downloads automatically now, with more feedback), but still need the mitochondrial references
            scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownload.bash");
            WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "echo \"\n# " + snpeffReference + "\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"" + snpeffReference + ".genome : Human genome " + snpeffReference.Split('.')[0] + " using RefSeq transcripts\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"" + snpeffReference + ".reference : ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"\t" + snpeffReference + ".M.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"\t" + snpeffReference + ".MT.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
            }).WaitForExit();
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Creates a dictionary for the genome fasta, used by many GATK tools.
        /// </summary>
        /// <param name="genomeFastaPath"></param>
        /// <returns></returns>
        private string GenomeDictionaryIndexCommand(string genomeFastaPath)
        {
            string dictionaryPath = Path.Combine(Path.GetDirectoryName(genomeFastaPath), Path.GetFileNameWithoutExtension(genomeFastaPath) + ".dict");

            return("if [ ! -f " + WrapperUtility.ConvertWindowsPath(dictionaryPath) + " ]; then " + //rm " + WrapperUtility.ConvertWindowsPath(dictionaryPath) + "; fi\n" +
                   Gatk(Workers) +                                                                  // formerly picard
                   " CreateSequenceDictionary" +
                   " -R " + WrapperUtility.ConvertWindowsPath(genomeFastaPath) +
                   " -O " + WrapperUtility.ConvertWindowsPath(dictionaryPath) +
                   "; fi");
        }
Ejemplo n.º 20
0
 /// <summary>
 /// Removes transcripts with zero abundance predictions
 /// </summary>
 /// <returns></returns>
 public static List <string> RemoveZeroAbundanceCufflinksPredictionsCommand(string spritzDirectory, string transcriptGtfPath, out string filteredTranscriptGtfPath)
 {
     filteredTranscriptGtfPath = Path.Combine(Path.GetDirectoryName(transcriptGtfPath), Path.GetFileNameWithoutExtension(transcriptGtfPath)) + ".filtered" + Path.GetExtension(transcriptGtfPath);
     return(new List <string>
     {
         WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
         "echo \"Removing zero-abundance transcripts from " + transcriptGtfPath + "\"",
         "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(filteredTranscriptGtfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(filteredTranscriptGtfPath) + " ]]; then " +
         "grep -v 'FPKM \"0.0000000000\"' " + WrapperUtility.ConvertWindowsPath(transcriptGtfPath) + " > " + WrapperUtility.ConvertWindowsPath(filteredTranscriptGtfPath) +
         "; fi"
     });
 }
Ejemplo n.º 21
0
 /// <summary>
 /// Gets command to remove all indels from a VCF file
 /// </summary>
 /// <param name="spritzDirectory"></param>
 /// <param name="vcfPath"></param>
 /// <param name="keepInfo"></param>
 /// <param name="applyFilter"></param>
 /// <returns>bash command to run vcftools to remove all indels from a VCF file</returns>
 public string RemoveAllIndels(string spritzDirectory, string vcfPath, bool keepInfo, bool applyFilter)
 {
     VcfWithoutIndelsPath = Path.Combine(Path.GetDirectoryName(vcfPath), Path.GetFileNameWithoutExtension(vcfPath)) + ".NoIndels.vcf";
     return
         ("if [ ! -f " + WrapperUtility.ConvertWindowsPath(VcfWithoutIndelsPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(VcfWithoutIndelsPath) + " ]; then " +
          "vcftools " +
          " --remove-indels --vcf " + WrapperUtility.ConvertWindowsPath(vcfPath) +
          " --recode " +
          (applyFilter ? " --remove-filtered-all " : "") +
          (keepInfo ? " --recode-INFO-all " : "") +
          " --stdout > " + WrapperUtility.ConvertWindowsPath(VcfWithoutIndelsPath) +
          "; fi");
 }
Ejemplo n.º 22
0
 /// <summary>
 /// Sets filter based on average genotype depth
 /// </summary>
 /// <param name="spritzDirectory"></param>
 /// <param name="vcfPath"></param>
 /// <param name="keepInfo"></param>
 /// <param name="minDepth"></param>
 /// <returns></returns>
 public string AverageGenotypeDepthFilter(string spritzDirectory, string vcfPath, bool keepInfo, float minDepth)
 {
     VcfDepthFilteredPath = Path.Combine(Path.GetDirectoryName(vcfPath), Path.GetFileNameWithoutExtension(vcfPath)) + ".DPFilter.vcf";
     return
         ("if [ ! -f " + WrapperUtility.ConvertWindowsPath(VcfDepthFilteredPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(VcfDepthFilteredPath) + " ]; then " +
          "vcftools " +
          " --vcf " + WrapperUtility.ConvertWindowsPath(vcfPath) +
          " --min-meanDP " + minDepth.ToString() +
          " --recode " +
          (keepInfo ? " --recode-INFO-all " : "") +
          " --stdout > " + WrapperUtility.ConvertWindowsPath(VcfDepthFilteredPath) +
          "; fi");
 }
Ejemplo n.º 23
0
        public static string StarDedupCommand(int threads, string inputBamPath, string outBamPath)
        {
            string dedupArguments =
                " --runMode inputAlignmentsFromBAM" +
                " --bamRemoveDuplicatesType UniqueIdentical" + // this could shorten the time for samples that aren't multiplexed, too; might only work with sortedBAM input from --inputBAMfile
                " --limitBAMsortRAM " + Process.GetCurrentProcess().VirtualMemorySize64.ToString() +
                " --runThreadN " + threads.ToString() +
                " --outBAMcompression 10" +
                " --inputBAMfile " + WrapperUtility.ConvertWindowsPath(inputBamPath) +
                " --outFileNamePrefix " + WrapperUtility.ConvertWindowsPath(outBamPath);

            return("STAR" + dedupArguments);
        }
Ejemplo n.º 24
0
 /// <summary>
 /// Generic method for subsetting a BAM file. Useful for testing new methods.
 /// </summary>
 /// <param name="spritzDirectory"></param>
 /// <param name="threads"></param>
 /// <param name="bam"></param>
 /// <param name="genomeFasta"></param>
 /// <param name="genomeRegion"></param>
 /// <param name="outputBam"></param>
 public void SubsetBam(string spritzDirectory, string analysisDirectory, int threads, string bam, string genomeFasta, string genomeRegion, string outputBam)
 {
     WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SubsetBam.bash"), new List <string>
     {
         WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
         Gatk(Workers) +
         " PrintReads" +
         " --num_threads " + threads.ToString() +
         " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) +
         " -I " + WrapperUtility.ConvertWindowsPath(bam) +
         " -o " + WrapperUtility.ConvertWindowsPath(outputBam) +
         " -L " + genomeRegion,
     }).WaitForExit();
 }
Ejemplo n.º 25
0
 public static void GenerateIndex(string spritzDirectory, string analysisDirectory, string genomeFasta, out string IndexPrefix)
 {
     IndexPrefix = Path.Combine(Path.GetDirectoryName(genomeFasta), Path.GetFileNameWithoutExtension(genomeFasta));
     if (IndexExists(genomeFasta))
     {
         return;
     }
     WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "Hisat2Build.bash"), new List <string>
     {
         WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
         "hisat2-2.1.0/hisat2-build" +
         " " + WrapperUtility.ConvertWindowsPath(genomeFasta) +
         " " + WrapperUtility.ConvertWindowsPath(IndexPrefix)
     }).WaitForExit();
 }
Ejemplo n.º 26
0
 public string DownloadEnsemblKnownVariantSites(string spritzDirectory, bool commonOnly, string reference, bool dryRun)
 {
     DownloadUCSCKnownVariantSites(spritzDirectory, commonOnly, reference, dryRun);
     EnsemblKnownSitesPath = ConvertVCFChromosomesUCSC2Ensembl(spritzDirectory, UcscKnownSitesPath, reference, dryRun);
     if (!dryRun)
     {
         // indexing is used for most GATK tools
         WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(spritzDirectory, "IndexKnownVariantSites.bash"), new List <string>
         {
             WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
             "if [ ! -f " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesPath) + ".idx ]; then " + Gatk(Workers) + " IndexFeatureFile -F " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesPath) + "; fi",
             "if [ ! -f " + WrapperUtility.ConvertWindowsPath(EnsemblKnownSitesPath) + ".idx ]; then " + Gatk(Workers) + " IndexFeatureFile -F " + WrapperUtility.ConvertWindowsPath(EnsemblKnownSitesPath) + "; fi",
         }).WaitForExit();
     }
     return(EnsemblKnownSitesPath);
 }
Ejemplo n.º 27
0
        public static void Trim(string spritzDirectory, string analysisDirectory, int threads, int qualityFilter, string[] readPaths, bool dryRun, out string[] readTrimmedPaths, out string log)
        {
            log = "";
            readTrimmedPaths = new string[readPaths.Length];
            if (readPaths.Length <= 0)
            {
                return;
            }

            // Only create paired entry if paired input, and ignore inputs after second index
            bool compressed = Path.GetExtension(readPaths[0]) == ".gz";

            string[] uncompressedReadPaths = compressed ? readPaths.Select(x => Path.Combine(Path.GetDirectoryName(x), Path.GetFileNameWithoutExtension(x))).ToArray() : readPaths;
            for (int i = 0; i < readPaths.Length; i++)
            {
                if (i == 0)
                {
                    readTrimmedPaths[0] = Path.Combine(Path.GetDirectoryName(uncompressedReadPaths[0]), Path.GetFileNameWithoutExtension(uncompressedReadPaths[0]) + "-trimmed" + (uncompressedReadPaths.Length > 1 ? "-pair1" : "") + ".fastq");
                }
                if (i == 1)
                {
                    readTrimmedPaths[1] = Path.Combine(Path.GetDirectoryName(uncompressedReadPaths[0]), Path.GetFileNameWithoutExtension(uncompressedReadPaths[0]) + "-trimmed-pair2.fastq");
                }
            }
            log = Path.Combine(Path.GetDirectoryName(uncompressedReadPaths[0]), Path.GetFileNameWithoutExtension(uncompressedReadPaths[0]) + "-trimmed.log");

            bool alreadyTrimmed = File.Exists(readTrimmedPaths[0]) && (readPaths.Length == 1 || File.Exists(readTrimmedPaths[1]));

            if (alreadyTrimmed || dryRun)
            {
                return;
            }

            string scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "Skewered.bash");

            WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "skewer-0.2.2/skewer" +
                " -q " + qualityFilter.ToString() +
                " -o " + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(uncompressedReadPaths[0]), Path.GetFileNameWithoutExtension(uncompressedReadPaths[0]))) +
                " -t " + threads.ToString() +
                " -x " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "BBMap", "resources", "adapters.fa")) +
                " " + WrapperUtility.ConvertWindowsPath(readPaths[0]) +
                (readPaths.Length > 1 ? " " + WrapperUtility.ConvertWindowsPath(readPaths[1]) : ""),
            }).WaitForExit();
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Downloads dbSNP reference VCF file if it doesn't exist
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="commonOnly"></param>
        /// <param name="reference"></param>
        /// <returns></returns>
        public string DownloadUCSCKnownVariantSites(string spritzDirectory, bool commonOnly, string reference, bool dryRun)
        {
            bool knownSitesExists = KnownVariantSitesFileExists(spritzDirectory, commonOnly, reference);

            if (!knownSitesExists && !dryRun)
            {
                WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(spritzDirectory, "DownloadUcscVariants.bash"), new List <string>
                {
                    "cd " + WrapperUtility.ConvertWindowsPath(spritzDirectory),
                    "wget " + TargetFileLocation,
                    "gunzip " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesDownloadPath) + ".gz",
                    "rm " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesDownloadPath) + ".gz",
                    "mv " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesDownloadPath) + " " + WrapperUtility.ConvertWindowsPath(UcscKnownSitesPath)
                }).WaitForExit();
            }
            return(UcscKnownSitesPath);
        }
Ejemplo n.º 29
0
 /// <summary>
 /// Bundles splice jucntions from first pass alignments into a single splice junction file for second-pass alignment.
 /// Excludes splice junctions for mitochondrial chromosome alignments.
 /// </summary>
 /// <param name="spliceJunctionOuts"></param>
 /// <param name="uniqueSuffix"></param>
 /// <param name="spliceJunctionStarts"></param>
 /// <returns></returns>
 public static List <string> ProcessFirstPassSpliceCommands(List <string> spliceJunctionOuts, int uniqueSuffix, out string spliceJunctionStarts)
 {
     if (spliceJunctionOuts.Count == 0)
     {
         throw new ArgumentException("STARWrapper.ProcessFirstPassSpliceCommands: No splice junctions detected for second-pass genome generation.");
     }
     spliceJunctionStarts = Path.Combine(Path.GetDirectoryName(spliceJunctionOuts[0]), "combined" + uniqueSuffix.ToString() + "." + SpliceJunctionFileSuffix);
     return(new List <string>
     {
         "if [ ! -f " + WrapperUtility.ConvertWindowsPath(spliceJunctionStarts) + " ]; then " +
         "awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' " +
         string.Join(" ", spliceJunctionOuts.Select(f => WrapperUtility.ConvertWindowsPath(f))) +
         " | grep -v 'MT' >> " +
         WrapperUtility.ConvertWindowsPath(spliceJunctionStarts) +
         "; fi"
     });
 }
Ejemplo n.º 30
0
        /// <summary>
        /// Downloads Ensembl references for GRCh37 or GRCh38.
        ///
        /// Sets GenomeFastaPath, GtfGeneModelPath, Gff3GeneModelPath, and ProteinFastaPath properties.
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="targetDirectory"></param>
        /// <param name="reference"></param>
        /// <param name="genomeFastaPath"></param>
        /// <param name="gtfGeneModelPath"></param>
        /// <param name="gff3GeneModelPath"></param>
        /// <param name="proteinFastaPath"></param>
        public void DownloadReferences(string spritzDirectory, string targetDirectory, string reference, bool dryRun)
        {
            bool downloadGrch37 = string.Equals(reference, "GRCh37", StringComparison.CurrentCultureIgnoreCase);
            bool downloadGrch38 = string.Equals(reference, "GRCh38", StringComparison.CurrentCultureIgnoreCase);

            GenomeFastaPath = downloadGrch37 ?
                              Path.Combine(targetDirectory, GRCh37PrimaryAssemblyFilename) :
                              downloadGrch38?
                              Path.Combine(targetDirectory, GRCh38PrimaryAssemblyFilename) :
                                  "";

            GtfGeneModelPath = downloadGrch37 ?
                               Path.Combine(targetDirectory, GRCh37GtfGeneModelFilename) :
                               downloadGrch38?
                               Path.Combine(targetDirectory, GRCh38GtfGeneModelFilename) :
                                   "";

            Gff3GeneModelPath = downloadGrch37 ?
                                GtfGeneModelPath :
                                downloadGrch38?
                                Path.Combine(targetDirectory, GRCh38Gff3GeneModelFilename) :
                                    "";

            ProteinFastaPath = downloadGrch37 ?
                               Path.Combine(targetDirectory, GRCh37ProteinFastaFilename) :
                               downloadGrch38?
                               Path.Combine(targetDirectory, GRCh38ProteinFastaFilename) :
                                   "";

            if (!downloadGrch37 && !downloadGrch38 || dryRun)
            {
                return;
            }

            WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(targetDirectory, "DownloadEnsemblReference.bash"), new List <string>
            {
                $"cd {WrapperUtility.ConvertWindowsPath(targetDirectory)}",
                $"if [ ! -f {Path.GetFileName(GenomeFastaPath)} ]; then wget -O - {(downloadGrch38 ? GRCh38PrimaryAssemblyUrl : GRCh37PrimaryAssemblyUrl)} | gunzip -c > {Path.GetFileName(GenomeFastaPath)}; fi",
                $"if [ ! -f {Path.GetFileName(GtfGeneModelPath)} ]; then wget -O - {(downloadGrch38 ? GRCh38GtfGeneModelUrl : GRCh37GtfGeneModelUrl)} | gunzip -c > {Path.GetFileName(GtfGeneModelPath)}; fi",
                $"if [ ! -f {Path.GetFileName(Gff3GeneModelPath)} ]; then wget -O - {(downloadGrch38 ? GRCh38Gff3GeneModelUrl : GRCh37GtfGeneModelUrl)} | gunzip -c > {Path.GetFileName(Gff3GeneModelPath)}; fi", // note GRCh37 calls the gtf url instead
                $"if [ ! -f {Path.GetFileName(ProteinFastaPath)} ]; then wget -O - {(downloadGrch38 ? GRCh38ProteinFastaUrl : GRCh37ProteinFastaUrl)} | gunzip -c > {Path.GetFileName(ProteinFastaPath)}; fi",    // note GRCh37 calls the gtf url instead
            }).WaitForExit();

            //Genome.WriteFasta(new Genome(genomeFastaPath).KaryotypicOrder(), genomeFastaPath); // todo: try this for ordering contigs before alignments; does gtf then need to be reordered?
        }