Beispiel #1
0
        public static (Dictionary <string, string> EntrezGeneIdToSymbol, Dictionary <string, string> EnsemblIdToSymbol) ParseUniversalGeneArchive(string inputReferencePath, string universalGeneArchivePath)
        {
            IDictionary <string, IChromosome> refNameToChromosome;

            if (inputReferencePath == null)
            {
                refNameToChromosome = null;
            }
            else
            {
                (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(inputReferencePath);
            }

            UgaGene[] genes;

            using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(universalGeneArchivePath),
                                                  refNameToChromosome))
            {
                genes = reader.GetGenes();
            }

            var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId);
            var ensemblIdToSymbol    = genes.GetGeneIdToSymbol(x => x.EnsemblId);

            return(entrezGeneIdToSymbol, ensemblIdToSymbol);
        }
Beispiel #2
0
        private static ExitCodes ProgramExecution()
        {
            var    logger         = new ConsoleLogger();
            string transcriptPath = _inputPrefix + ".transcripts.gz";
            string siftPath       = _inputPrefix + ".sift.gz";
            string polyphenPath   = _inputPrefix + ".polyphen.gz";
            string regulatoryPath = _inputPrefix + ".regulatory.gz";

            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);

            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))
                using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))
                    using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))
                        using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))
                            using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))
                            {
                                var    genomeAssembly  = transcriptReader.Header.Assembly;
                                var    source          = transcriptReader.Header.Source;
                                long   vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;
                                ushort vepVersion      = transcriptReader.Header.VepVersion;

                                logger.Write("- loading universal gene archive file... ");
                                var genes      = geneReader.GetGenes();
                                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);
                                logger.WriteLine($"{genes.Length:N0} loaded.");

                                logger.Write("- loading regulatory region file... ");
                                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();
                                logger.WriteLine($"{regulatoryRegions.Length:N0} loaded.");

                                logger.Write("- loading transcript file... ");
                                var transcripts           = transcriptReader.GetTranscripts();
                                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);
                                logger.WriteLine($"{transcripts.Length:N0} loaded.");

                                MarkCanonicalTranscripts(logger, transcripts);

                                var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly);
                                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);

                                logger.Write("- writing SIFT prediction cache... ");
                                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                logger.Write("- writing PolyPhen prediction cache... ");
                                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion);
                                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);

                                logger.Write("- writing transcript cache... ");
                                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");
                            }

            return(ExitCodes.Success);
        }
Beispiel #3
0
                        EnsemblIdToSymbol) ParseUniversalGeneArchive()
        {
            var(_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(_inputReferencePath);

            UgaGene[] genes;

            using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(_universalGeneArchivePath),
                                                  refNameToChromosome))
            {
                genes = reader.GetGenes();
            }

            var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId);
            var ensemblIdToSymbol    = genes.GetGeneIdToSymbol(x => x.EnsemblId);

            return(entrezGeneIdToSymbol, ensemblIdToSymbol);
        }