/// <summary> /// executes the program /// </summary> protected override void ProgramExecution() { var converter = new FastaToCompressedConverter(); var genomeAssembly = GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly); converter.Convert(ConfigurationSettings.InputFastaPath, ConfigurationSettings.InputCytobandPath, ConfigurationSettings.InputChromosomeNamesPath, ConfigurationSettings.OutputCompressedPath, genomeAssembly); }
public void GenomeAssemblyConvertTest() { Assert.Equal(GenomeAssembly.GRCh37, GenomeAssemblyUtilities.Convert("grCh37")); Assert.Equal(GenomeAssembly.GRCh38, GenomeAssemblyUtilities.Convert("GrCh38")); Assert.Equal(GenomeAssembly.hg19, GenomeAssemblyUtilities.Convert("HG19")); var error = false; try { GenomeAssemblyUtilities.Convert("asldkjf"); } catch (Exception) { error = true; } Assert.True(error); }
/// <summary> /// executes the program /// </summary> protected override void ProgramExecution() { var transcriptSource = ConfigurationSettings.ImportRefSeqTranscripts ? TranscriptDataSource.RefSeq : TranscriptDataSource.Ensembl; var referenceIndex = new ReferenceIndex(ConfigurationSettings.InputReferencePath); var vepDirectories = referenceIndex.GetUcscKaryotypeOrder(ConfigurationSettings.InputVepDirectory); var converter = new VepCacheParser(transcriptSource); var genomeAssembly = GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly); // ========================= // create the pre-cache file // ========================= // process each VEP directory int numDirectoriesProcessed = 0; var transcriptPath = ConfigurationSettings.OutputStub + ".transcripts.gz"; var regulatoryPath = ConfigurationSettings.OutputStub + ".regulatory.gz"; var genePath = ConfigurationSettings.OutputStub + ".genes.gz"; var intronPath = ConfigurationSettings.OutputStub + ".introns.gz"; var exonPath = ConfigurationSettings.OutputStub + ".exons.gz"; var mirnaPath = ConfigurationSettings.OutputStub + ".mirnas.gz"; var siftPath = ConfigurationSettings.OutputStub + ".sift.dat"; var polyphenPath = ConfigurationSettings.OutputStub + ".polyphen.dat"; var cdnaPath = ConfigurationSettings.OutputStub + ".cdnas.gz"; var peptidePath = ConfigurationSettings.OutputStub + ".peptides.gz"; using (var transcriptWriter = GZipUtilities.GetStreamWriter(transcriptPath)) using (var regulatoryWriter = GZipUtilities.GetStreamWriter(regulatoryPath)) using (var geneWriter = GZipUtilities.GetStreamWriter(genePath)) using (var intronWriter = GZipUtilities.GetStreamWriter(intronPath)) using (var exonWriter = GZipUtilities.GetStreamWriter(exonPath)) using (var mirnaWriter = GZipUtilities.GetStreamWriter(mirnaPath)) using (var siftWriter = GZipUtilities.GetBinaryWriter(siftPath + ".tmp")) using (var polyphenWriter = GZipUtilities.GetBinaryWriter(polyphenPath + ".tmp")) using (var cdnaWriter = GZipUtilities.GetStreamWriter(cdnaPath)) using (var peptideWriter = GZipUtilities.GetStreamWriter(peptidePath)) { transcriptWriter.NewLine = "\n"; regulatoryWriter.NewLine = "\n"; geneWriter.NewLine = "\n"; intronWriter.NewLine = "\n"; exonWriter.NewLine = "\n"; mirnaWriter.NewLine = "\n"; cdnaWriter.NewLine = "\n"; peptideWriter.NewLine = "\n"; WriteHeader(transcriptWriter, GlobalImportCommon.FileType.Transcript, transcriptSource, genomeAssembly); WriteHeader(regulatoryWriter, GlobalImportCommon.FileType.Regulatory, transcriptSource, genomeAssembly); WriteHeader(geneWriter, GlobalImportCommon.FileType.Gene, transcriptSource, genomeAssembly); WriteHeader(intronWriter, GlobalImportCommon.FileType.Intron, transcriptSource, genomeAssembly); WriteHeader(exonWriter, GlobalImportCommon.FileType.Exon, transcriptSource, genomeAssembly); WriteHeader(mirnaWriter, GlobalImportCommon.FileType.MicroRna, transcriptSource, genomeAssembly); WriteHeader(siftWriter, GlobalImportCommon.FileType.Sift, transcriptSource, genomeAssembly); WriteHeader(polyphenWriter, GlobalImportCommon.FileType.PolyPhen, transcriptSource, genomeAssembly); WriteHeader(cdnaWriter, GlobalImportCommon.FileType.CDna, transcriptSource, genomeAssembly); WriteHeader(peptideWriter, GlobalImportCommon.FileType.Peptide, transcriptSource, genomeAssembly); foreach (var refTuple in vepDirectories) { // DEBUG //if (refTuple.Item1 != "chr7") continue; Console.WriteLine("Parsing reference sequence [{0}]:", refTuple.Item1); numDirectoriesProcessed++; var refIndex = referenceIndex.GetIndex(refTuple.Item1); converter.ParseDumpDirectory(refIndex, refTuple.Item2, transcriptWriter, regulatoryWriter, geneWriter, intronWriter, exonWriter, mirnaWriter, siftWriter, polyphenWriter, cdnaWriter, peptideWriter); } } Console.WriteLine("\n{0} directories processed.", numDirectoriesProcessed); converter.DumpStatistics(); Console.WriteLine(); // convert our protein function predictions var predictionConverter = new PredictionConverter(referenceIndex.NumReferenceSeqs); predictionConverter.Convert(siftPath, "SIFT", GlobalImportCommon.FileType.Sift); predictionConverter.Convert(polyphenPath, "PolyPhen", GlobalImportCommon.FileType.PolyPhen); }
/// <summary> /// executes the program /// </summary> protected override void ProgramExecution() { Console.WriteLine("Reading file: {0}", ConfigurationSettings.InputWigFixFile); var timer = new Benchmark(); var version = GetDataVersion(); using (var nirvanaPhylopDatabaseCreator = new PhylopWriter(ConfigurationSettings.InputWigFixFile, version, GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly), ConfigurationSettings.OutputNirvanaDirectory)) { nirvanaPhylopDatabaseCreator.ExtractPhylopScores(); } Console.WriteLine("Time:{0}", timer.GetElapsedTime()); }