private static ExitCodes ProgramExecution() { var logger = new ConsoleLogger(); string transcriptPath = _inputPrefix + ".transcripts.gz"; string siftPath = _inputPrefix + ".sift.gz"; string polyphenPath = _inputPrefix + ".polyphen.gz"; string regulatoryPath = _inputPrefix + ".regulatory.gz"; (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath); using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome)) using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome)) using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift)) using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen)) using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome)) { var genomeAssembly = transcriptReader.Header.Assembly; var source = transcriptReader.Header.Source; long vepReleaseTicks = transcriptReader.Header.VepReleaseTicks; ushort vepVersion = transcriptReader.Header.VepVersion; logger.Write("- loading universal gene archive file... "); var genes = geneReader.GetGenes(); var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly); logger.WriteLine($"{genes.Length:N0} loaded."); logger.Write("- loading regulatory region file... "); var regulatoryRegions = regulatoryReader.GetRegulatoryRegions(); logger.WriteLine($"{regulatoryRegions.Length:N0} loaded."); logger.Write("- loading transcript file... "); var transcripts = transcriptReader.GetTranscripts(); var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index); logger.WriteLine($"{transcripts.Length:N0} loaded."); MarkCanonicalTranscripts(logger, transcripts); var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly); var predictionCaches = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs); logger.Write("- writing SIFT prediction cache... "); predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); logger.Write("- writing PolyPhen prediction cache... "); predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion); var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs); logger.Write("- writing transcript cache... "); transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); } return(ExitCodes.Success); }
public (PredictionCacheStaging Sift, PredictionCacheStaging PolyPhen) CreatePredictionCaches( Dictionary <ushort, List <MutableTranscript> > transcriptsByRefIndex, PredictionReader siftReader, PredictionReader polyphenReader, int numRefSeqs) { _logger.Write("- converting prediction strings... "); var siftRoundedPredictionsPerRef = new RoundedEntryPrediction[numRefSeqs][]; var polyPhenRoundedPredictionsPerRef = new RoundedEntryPrediction[numRefSeqs][]; for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++) { var sift = siftReader.GetPredictionData(); var polyphen = polyphenReader.GetPredictionData(); if (sift.Chromosome.Index != refIndex || polyphen.Chromosome.Index != refIndex) { throw new InvalidDataException( $"Found mismatch between transcript chromosome index ({refIndex}) and prediction chromosome indices (SIFT: {sift.Chromosome.Index}, PolyPhen: {polyphen.Chromosome.Index}."); } if (!transcriptsByRefIndex.TryGetValue(refIndex, out var refTranscripts)) { continue; } var(siftPredictions, polyPhenPredictions) = ProcessReference(refTranscripts, sift.TranscriptToPredictionIndex, polyphen.TranscriptToPredictionIndex, sift.PredictionData, polyphen.PredictionData); siftRoundedPredictionsPerRef[refIndex] = siftPredictions; polyPhenRoundedPredictionsPerRef[refIndex] = polyPhenPredictions; } _logger.WriteLine("finished."); var siftStaging = BuildCacheStaging("SIFT", siftRoundedPredictionsPerRef, numRefSeqs); var polyPhenStaging = BuildCacheStaging("PolyPhen", polyPhenRoundedPredictionsPerRef, numRefSeqs); return(siftStaging, polyPhenStaging); }