public static (Dictionary <string, string> EntrezGeneIdToSymbol, Dictionary <string, string> EnsemblIdToSymbol) ParseUniversalGeneArchive(string inputReferencePath, string universalGeneArchivePath) { IDictionary <string, IChromosome> refNameToChromosome; if (inputReferencePath == null) { refNameToChromosome = null; } else { (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(inputReferencePath); } UgaGene[] genes; using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(universalGeneArchivePath), refNameToChromosome)) { genes = reader.GetGenes(); } var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId); var ensemblIdToSymbol = genes.GetGeneIdToSymbol(x => x.EnsemblId); return(entrezGeneIdToSymbol, ensemblIdToSymbol); }
private static ExitCodes ProgramExecution() { var logger = new ConsoleLogger(); string transcriptPath = _inputPrefix + ".transcripts.gz"; string siftPath = _inputPrefix + ".sift.gz"; string polyphenPath = _inputPrefix + ".polyphen.gz"; string regulatoryPath = _inputPrefix + ".regulatory.gz"; (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath); using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome)) using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome)) using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift)) using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen)) using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome)) { var genomeAssembly = transcriptReader.Header.Assembly; var source = transcriptReader.Header.Source; long vepReleaseTicks = transcriptReader.Header.VepReleaseTicks; ushort vepVersion = transcriptReader.Header.VepVersion; logger.Write("- loading universal gene archive file... "); var genes = geneReader.GetGenes(); var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly); logger.WriteLine($"{genes.Length:N0} loaded."); logger.Write("- loading regulatory region file... "); var regulatoryRegions = regulatoryReader.GetRegulatoryRegions(); logger.WriteLine($"{regulatoryRegions.Length:N0} loaded."); logger.Write("- loading transcript file... "); var transcripts = transcriptReader.GetTranscripts(); var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index); logger.WriteLine($"{transcripts.Length:N0} loaded."); MarkCanonicalTranscripts(logger, transcripts); var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly); var predictionCaches = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs); logger.Write("- writing SIFT prediction cache... "); predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); logger.Write("- writing PolyPhen prediction cache... "); predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion); var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs); logger.Write("- writing transcript cache... "); transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); } return(ExitCodes.Success); }
EnsemblIdToSymbol) ParseUniversalGeneArchive() { var(_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(_inputReferencePath); UgaGene[] genes; using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(_universalGeneArchivePath), refNameToChromosome)) { genes = reader.GetGenes(); } var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId); var ensemblIdToSymbol = genes.GetGeneIdToSymbol(x => x.EnsemblId); return(entrezGeneIdToSymbol, ensemblIdToSymbol); }