public List <string> PrimaryVariantAnnotation(string spritzDirectory, string reference, string inputVcfPath, bool fromReference = false) { string outPrefix = Path.Combine(Path.GetDirectoryName(inputVcfPath), Path.GetFileNameWithoutExtension(inputVcfPath)); AnnotatedVcfPath = outPrefix + ".snpEffAnnotated.vcf"; HtmlReportPath = outPrefix + ".snpEffAnnotated.html"; AnnotatedGenesSummaryPath = outPrefix + ".snpEffAnnotated.genes.txt"; VariantProteinFastaPath = outPrefix + ".snpEffAnnotated.protein.fasta"; VariantProteinXmlPath = outPrefix + ".snpEffAnnotated.protein.xml"; Directory.CreateDirectory(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data")); string[] existingDatabases = Directory.GetDirectories(Path.Combine(spritzDirectory, "Tools", "SnpEff", "data")); if (File.Exists(AnnotatedVcfPath) && new FileInfo(AnnotatedVcfPath).Length > 0) { return(new List <string>()); } return(new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), SnpEff(Workers) + " -v -stats " + WrapperUtility.ConvertWindowsPath(HtmlReportPath) + " -fastaProt " + WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath) + " -xmlProt " + WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath) + " " + Path.GetFileName(existingDatabases.FirstOrDefault(x => Path.GetFileName(x).StartsWith(reference, true, null))) + (fromReference ? "" : $" {WrapperUtility.ConvertWindowsPath(inputVcfPath)} > {WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)}"), // ensure that the files get closed before continuing WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath)), WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinFastaPath)), WrapperUtility.EnsureClosedFileCommands(WrapperUtility.ConvertWindowsPath(VariantProteinXmlPath)), // remove the annotated VCF file if snpEff didn't work, e.g. if there was no VCF file to annotate "if [[ ( -f " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " && ! -s " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath) + " ) ]]; then", " rm " + WrapperUtility.ConvertWindowsPath(AnnotatedVcfPath), "fi", }); }
// see here for how to generate them from scratch: http://lab.loman.net/2012/11/16/how-to-get-snpeff-working-with-bacterial-genomes-from-ncbi/ public void DownloadSnpEffDatabase(string spritzDirectory, string analysisDirectory, string reference) { DatabaseListPath = Path.Combine(spritzDirectory, "snpEffDatabases.txt"); // check for existing list and database bool databaseListExists = File.Exists(DatabaseListPath); string databaseDirectory = Path.Combine(spritzDirectory, "Tools", "SnpEff", "data"); string[] existingDatabases = Directory.Exists(databaseDirectory) ? Directory.GetDirectories(databaseDirectory) : new string[0]; bool databaseExists = existingDatabases.Any(d => Path.GetFileName(d).StartsWith(reference, true, null)); if (databaseListExists && databaseExists) { return; } // download database list string scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownloadList.bash"); WrapperUtility.GenerateAndRunScript(scriptPath, new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "echo \"Downloading list of SnpEff references\"", SnpEff(Workers) + " databases > " + WrapperUtility.ConvertWindowsPath(DatabaseListPath), WrapperUtility.EnsureClosedFileCommands(DatabaseListPath) }).WaitForExit(); List <string> databases = new List <string>(); using (StreamReader reader = new StreamReader(DatabaseListPath)) { while (true) { string line = reader.ReadLine(); if (line == null) { break; } databases.Add(line.Split('\t')[0].TrimEnd()); } } string snpeffReference = databases.FirstOrDefault(d => d.StartsWith(reference, true, CultureInfo.InvariantCulture)); // download database (it downloads automatically now, with more feedback), but still need the mitochondrial references scriptPath = WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "SnpEffDatabaseDownload.bash"); WrapperUtility.GenerateAndRunScript(scriptPath, new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "echo \"\n# " + snpeffReference + "\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")), "echo \"" + snpeffReference + ".genome : Human genome " + snpeffReference.Split('.')[0] + " using RefSeq transcripts\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")), "echo \"" + snpeffReference + ".reference : ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")), "echo \"\t" + snpeffReference + ".M.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")), "echo \"\t" + snpeffReference + ".MT.codonTable : Vertebrate_Mitochondrial\" >> " + WrapperUtility.ConvertWindowsPath(Path.Combine(spritzDirectory, "Tools", "SnpEff", "snpEff.config")), }).WaitForExit(); }
public static int InferInnerDistance(string spritzDirectory, string analysisDirectory, string bamPath, string geneModelPath, out string[] outputFiles) { if (Path.GetExtension(geneModelPath) != ".bed") { geneModelPath = BEDOPSWrapper.GffOrGtf2Bed12(spritzDirectory, analysisDirectory, geneModelPath); } outputFiles = new string[] { Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceRPlotSuffix, Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceFrequencyTableSuffix, Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceDistanceTableSuffix }; WrapperUtility.GenerateAndRunScript(WrapperUtility.GetAnalysisScriptPath(analysisDirectory, "InferInnerDistance.bash"), new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "python RSeQC-2.6.4/scripts/inner_distance.py" + " -i " + WrapperUtility.ConvertWindowsPath(bamPath) + // input " -o " + WrapperUtility.ConvertWindowsPath(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath))) + // out prefix " -r " + WrapperUtility.ConvertWindowsPath(geneModelPath), // gene model in BED format WrapperUtility.EnsureClosedFileCommands(outputFiles[0]), WrapperUtility.EnsureClosedFileCommands(outputFiles[1]), WrapperUtility.EnsureClosedFileCommands(outputFiles[2]), }).WaitForExit(); string[] distance_lines = File.ReadAllLines(Path.Combine(Path.GetDirectoryName(bamPath), Path.GetFileNameWithoutExtension(bamPath)) + InnerDistanceDistanceTableSuffix); List <int> distances = new List <int>(); foreach (string dline in distance_lines) { if (int.TryParse(dline.Split('\t')[1], out int distance) && distance <250 && distance> -250) // default settings for infer_distance { distances.Add(distance); } } int averageDistance = (int)Math.Round(distances.Average(), 0); return(averageDistance); }