Beispiel #1
0
        private static ExitCodes ProgramExecution()
        {
            var    logger         = new ConsoleLogger();
            string transcriptPath = _inputPrefix + ".transcripts.gz";
            string siftPath       = _inputPrefix + ".sift.gz";
            string polyphenPath   = _inputPrefix + ".polyphen.gz";
            string regulatoryPath = _inputPrefix + ".regulatory.gz";

            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);

            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))
                using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))
                    using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))
                        using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))
                            using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))
                            {
                                var    genomeAssembly  = transcriptReader.Header.Assembly;
                                var    source          = transcriptReader.Header.Source;
                                long   vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;
                                ushort vepVersion      = transcriptReader.Header.VepVersion;

                                logger.Write("- loading universal gene archive file... ");
                                var genes      = geneReader.GetGenes();
                                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);
                                logger.WriteLine($"{genes.Length:N0} loaded.");

                                logger.Write("- loading regulatory region file... ");
                                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();
                                logger.WriteLine($"{regulatoryRegions.Length:N0} loaded.");

                                logger.Write("- loading transcript file... ");
                                var transcripts           = transcriptReader.GetTranscripts();
                                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);
                                logger.WriteLine($"{transcripts.Length:N0} loaded.");

                                MarkCanonicalTranscripts(logger, transcripts);

                                var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly);
                                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);

                                logger.Write("- writing SIFT prediction cache... ");
                                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                logger.Write("- writing PolyPhen prediction cache... ");
                                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion);
                                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);

                                logger.Write("- writing transcript cache... ");
                                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");
                            }

            return(ExitCodes.Success);
        }
        public (PredictionCacheStaging Sift, PredictionCacheStaging PolyPhen) CreatePredictionCaches(
            Dictionary <ushort, List <MutableTranscript> > transcriptsByRefIndex, PredictionReader siftReader,
            PredictionReader polyphenReader, int numRefSeqs)
        {
            _logger.Write("- converting prediction strings... ");

            var siftRoundedPredictionsPerRef     = new RoundedEntryPrediction[numRefSeqs][];
            var polyPhenRoundedPredictionsPerRef = new RoundedEntryPrediction[numRefSeqs][];

            for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
            {
                var sift     = siftReader.GetPredictionData();
                var polyphen = polyphenReader.GetPredictionData();

                if (sift.Chromosome.Index != refIndex || polyphen.Chromosome.Index != refIndex)
                {
                    throw new InvalidDataException(
                              $"Found mismatch between transcript chromosome index ({refIndex}) and prediction chromosome indices (SIFT: {sift.Chromosome.Index}, PolyPhen: {polyphen.Chromosome.Index}.");
                }

                if (!transcriptsByRefIndex.TryGetValue(refIndex, out var refTranscripts))
                {
                    continue;
                }

                var(siftPredictions, polyPhenPredictions) = ProcessReference(refTranscripts,
                                                                             sift.TranscriptToPredictionIndex, polyphen.TranscriptToPredictionIndex, sift.PredictionData,
                                                                             polyphen.PredictionData);

                siftRoundedPredictionsPerRef[refIndex]     = siftPredictions;
                polyPhenRoundedPredictionsPerRef[refIndex] = polyPhenPredictions;
            }

            _logger.WriteLine("finished.");

            var siftStaging     = BuildCacheStaging("SIFT", siftRoundedPredictionsPerRef, numRefSeqs);
            var polyPhenStaging = BuildCacheStaging("PolyPhen", polyPhenRoundedPredictionsPerRef, numRefSeqs);

            return(siftStaging, polyPhenStaging);
        }