예제 #1
0
        public List <string> PrimaryVariantAnnotation(string spritzDirectory, string reference, string inputVcfPath, bool fromReference = false)
        {
            string outPrefix = Path.Combine(Path.GetDirectoryName(inputVcfPath), Path.GetFileNameWithoutExtension(inputVcfPath));

            AnnotatedVcfPath          = outPrefix + ".snpEffAnnotated.vcf";
            HtmlReportPath            = outPrefix + ".snpEffAnnotated.html";
            AnnotatedGenesSummaryPath = outPrefix + ".snpEffAnnotated.genes.txt";
            VariantProteinFastaPath   = outPrefix + ".snpEffAnnotated.protein.fasta";
            VariantProteinXmlPath     = outPrefix + ".snpEffAnnotated.protein.xml";
            Directory.CreateDirectory(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data"));
            string[] existingDatabases = Directory.GetDirectories(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data"));
            if (File.Exists(AnnotatedVcfPath) && new FileInfo(AnnotatedVcfPath).Length > 0)
            {
                return(new List <string>());
            }
            return(new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                SnpEff(Workers) + " -v -stats " + WrapperUtility.ConvertWindowsPath(HtmlReportPath) +
                " -fastaProt " + WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath) +
                " -xmlProt " + WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath) +
                " " + Path.GetFileName(existingDatabases.FirstOrDefault(x => Path.GetFileName(x).StartsWith(reference, true, null))) +
                (fromReference ? "" : $" {WrapperUtility.ConvertWindowsPath(inputVcfPath)} > {WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)}"),

                // ensure that the files get closed before continuing
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)),
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath)),
                WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath)),

                // remove the annotated VCF file if snpEff didn't work, e.g. if there was no VCF file to annotate
                "if [[ ( -f " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " && ! -s " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " ) ]]; then",
                "  rm " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath),
                "fi",
            });
        }
예제 #2
0
        // see here for how to generate them from scratch: http://lab.loman.net/2012/11/16/how-to-get-snpeff-working-with-bacterial-genomes-from-ncbi/
        public void DownloadSnpEffDatabase(string spritzDirectory, string analysisDirectory, string reference)
        {
            DatabaseListPath = Path.Combine(spritzDirectory, "snpEffDatabases.txt");

            // check for existing list and database
            bool   databaseListExists = File.Exists(DatabaseListPath);
            string databaseDirectory  = Path.Combine(spritzDirectory, "Tools", "SnpEff", "data");

            string[] existingDatabases = Directory.Exists(databaseDirectory) ? Directory.GetDirectories(databaseDirectory) : new string[0];
            bool     databaseExists    = existingDatabases.Any(d => Path.GetFileName(d).StartsWith(reference, true, null));

            if (databaseListExists && databaseExists)
            {
                return;
            }

            // download database list
            string scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownloadList.bash");

            WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "echo \"Downloading list of SnpEff references\"",
                SnpEff(Workers) + " databases > " + WrapperUtility.ConvertWindowsPath(DatabaseListPath),
                WrapperUtility.EnsureClosedFileCommands(DatabaseListPath)
            }).WaitForExit();

            List <string> databases = new List <string>();

            using (StreamReader reader = new StreamReader(DatabaseListPath))
            {
                while (true)
                {
                    string line = reader.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    databases.Add(line.Split('\t')[0].TrimEnd());
                }
            }
            string snpeffReference = databases.FirstOrDefault(d => d.StartsWith(reference, true, CultureInfo.InvariantCulture));

            // download database (it downloads automatically now, with more feedback), but still need the mitochondrial references
            scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownload.bash");
            WrapperUtility.GenerateAndRunScript(scriptPath, new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "echo \"\n# " + snpeffReference + "\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"" + snpeffReference + ".genome : Human genome " + snpeffReference.Split('.')[0] + " using RefSeq transcripts\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"" + snpeffReference + ".reference : ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"\t" + snpeffReference + ".M.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
                "echo \"\t" + snpeffReference + ".MT.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")),
            }).WaitForExit();
        }
예제 #3
0
        public static int InferInnerDistance(string spritzDirectory, string analysisDirectory, string bamPath, string geneModelPath, out string[] outputFiles)
        {
            if (Path.GetExtension(geneModelPath) != ".bed")
            {
                geneModelPath = BEDOPSWrapper.GffOrGtf2Bed12(spritzDirectory, analysisDirectory, geneModelPath);
            }

            outputFiles = new string[]
            {
                Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceRPlotSuffix,
                Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceFrequencyTableSuffix,
                Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceDistanceTableSuffix
            };

            WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "InferInnerDistance.bash"), new List <string>
            {
                WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory),
                "python RSeQC-2.6.4/scripts/inner_distance.py" +
                " -i " + WrapperUtility.ConvertWindowsPath(bamPath) +                                                                                 // input
                " -o " + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath))) + // out prefix
                " -r " + WrapperUtility.ConvertWindowsPath(geneModelPath),                                                                            // gene model in BED format
                WrapperUtility.EnsureClosedFileCommands(outputFiles[0]),
                WrapperUtility.EnsureClosedFileCommands(outputFiles[1]),
                WrapperUtility.EnsureClosedFileCommands(outputFiles[2]),
            }).WaitForExit();

            string[]   distance_lines = File.ReadAllLines(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceDistanceTableSuffix);
            List <int> distances      = new List <int>();

            foreach (string dline in distance_lines)
            {
                if (int.TryParse(dline.Split('\t')[1], out int distance) &&
                    distance <250 && distance> -250)      // default settings for infer_distance
                {
                    distances.Add(distance);
                }
            }
            int averageDistance = (int)Math.Round(distances.Average(), 0);

            return(averageDistance);
        }