public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider, ProteinConservationProvider conservationProvider)
        {
            Name                  = "Transcript annotation provider";
            _sequence             = sequenceProvider.Sequence;
            _refNameToChromosome  = sequenceProvider.RefNameToChromosome;
            _conservationProvider = conservationProvider;

            using (var stream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)))
            {
                (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(stream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);
            }

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            // TODO: this is not great. We should not be using IEnumerables if we have to resort to strange stuff like this
            if (conservationProvider != null)
            {
                DataSourceVersions = DataSourceVersions.Concat(new[] { conservationProvider.Version });
            }

            _siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));
            _siftReader = new PredictionCacheReader(_siftStream, PredictionCacheReader.SiftDescriptions);

            _polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));
            _polyphenReader = new PredictionCacheReader(_polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Esempio n. 2
0
        private List <PredictionCache> GetMergedPredictions(string path1, string path2)
        {
            var mergedPredictions = new List <PredictionCache>();

            using (var reader1 = new PredictionCacheReader(FileUtilities.GetReadStream(path1)))
                using (var reader2 = new PredictionCacheReader(FileUtilities.GetReadStream(path2)))
                {
                    _genomeAssembly = reader1.FileHeader.GenomeAssembly;
                    _numRefSeq      = reader1.FileHeader.Index.Size;

                    if (_genomeAssembly != reader2.FileHeader.GenomeAssembly)
                    {
                        throw  new UserErrorException($"Observed different genome assemblies: {reader1.FileHeader.GenomeAssembly}, {reader2.FileHeader.GenomeAssembly}");
                    }

                    for (ushort i = 0; i < _numRefSeq; i++)
                    {
                        var cache1 = reader1.Read(i);
                        var cache2 = reader2.Read(i);

                        if (cache1 == PredictionCache.Empty ^ cache2 == PredictionCache.Empty)
                        {
                            throw new DataMisalignedException("one of the cache ran out before the other");
                        }
                        mergedPredictions.Add(cache1.GetMergedCache(cache2));
                    }
                    //todo: take care of ref sequences unique to one cache
                }
            return(mergedPredictions);
        }
        private static ExitCodes ProgramExecution()
        {
            var sequenceData = SequenceHelper.GetDictionaries(_refSequencePath);
            var logger       = new ConsoleLogger();

            var caches = LoadTranscriptCaches(logger, CacheConstants.TranscriptPath(_inputPrefix),
                                              CacheConstants.TranscriptPath(_inputPrefix2), sequenceData.refIndexToChromosome);

            if (caches.Cache.TranscriptIntervalArrays.Length != caches.Cache2.TranscriptIntervalArrays.Length)
            {
                throw new InvalidDataException($"Expected the number of reference sequences in cache 1 ({caches.Cache.TranscriptIntervalArrays.Length}) and cache 2 ({caches.Cache2.TranscriptIntervalArrays.Length}) to be the same.");
            }

            int numRefSeqs                = caches.Cache.TranscriptIntervalArrays.Length;
            var combinedIntervalArrays    = new IntervalArray <ITranscript> [numRefSeqs];
            var siftPredictionsPerRef     = new Prediction[numRefSeqs][];
            var polyphenPredictionsPerRef = new Prediction[numRefSeqs][];

            PredictionHeader siftHeader;
            PredictionHeader polyphenHeader;

            using (var siftReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix)), PredictionCacheReader.SiftDescriptions))
                using (var siftReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix2)), PredictionCacheReader.SiftDescriptions))
                    using (var polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix)), PredictionCacheReader.PolyphenDescriptions))
                        using (var polyphenReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix2)), PredictionCacheReader.PolyphenDescriptions))
                        {
                            siftHeader     = siftReader.Header;
                            polyphenHeader = polyphenReader.Header;

                            for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
                            {
                                var chromosome = sequenceData.refIndexToChromosome[refIndex];

                                Console.ForegroundColor = ConsoleColor.Yellow;
                                logger.WriteLine($"\n{chromosome.UcscName}:");
                                Console.ResetColor();

                                var sift = CombinePredictions(logger, chromosome, "SIFT", siftReader, siftReader2);
                                siftPredictionsPerRef[refIndex] = sift.Predictions;

                                var polyphen = CombinePredictions(logger, chromosome, "PolyPhen", polyphenReader, polyphenReader2);
                                polyphenPredictionsPerRef[refIndex] = polyphen.Predictions;

                                var transcriptIntervalArray  = caches.Cache.TranscriptIntervalArrays[refIndex];
                                var transcriptIntervalArray2 = caches.Cache2.TranscriptIntervalArrays[refIndex];

                                combinedIntervalArrays[refIndex] = CombineTranscripts(logger, transcriptIntervalArray,
                                                                                      transcriptIntervalArray2, sift.Offset, polyphen.Offset);
                            }
                        }

            logger.WriteLine();
            WritePredictions(logger, "SIFT", CacheConstants.SiftPath(_outputPrefix), siftHeader, siftPredictionsPerRef);
            WritePredictions(logger, "PolyPhen", CacheConstants.PolyPhenPath(_outputPrefix), polyphenHeader, polyphenPredictionsPerRef);
            WriteTranscripts(logger, CloneHeader(caches.Cache.Header), combinedIntervalArrays,
                             caches.Cache.RegulatoryRegionIntervalArrays);

            return(ExitCodes.Success);
        }
Esempio n. 4
0
 private DataBundle(CompressedSequenceReader sequenceReader, PredictionCacheReader siftReader,
                    PredictionCacheReader polyPhenReader, VC.TranscriptCacheData cacheData, VC.TranscriptCache transcriptCache,
                    Source source)
 {
     SequenceReader      = sequenceReader;
     TranscriptCacheData = cacheData;
     TranscriptCache     = transcriptCache;
     Source         = source;
     SiftReader     = siftReader;
     PolyPhenReader = polyPhenReader;
 }
        private static (PredictionCacheStaging Staging, Prediction[] Predictions) GetPredictionStaging(
            string description, IEnumerable <ITranscript> transcripts, IChromosome chromosome, IReadOnlyList <Prediction> oldPredictions,
            PredictionCacheReader reader, Func <ITranscript, int> indexFunc, int numRefSeqs)
        {
            Logger.Write($"- retrieving {description} predictions... ");

            var indexSet          = GetUniqueIndices(transcripts, indexFunc);
            var predictionsPerRef = GetPredictions(indexSet, chromosome, numRefSeqs, oldPredictions);
            var staging           = new PredictionCacheStaging(reader.Header, predictionsPerRef);

            Logger.WriteLine($"found {indexSet.Count} predictions.");
            return(staging, predictionsPerRef[chromosome.Index]);
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            (_transcriptCache, VepVersion) = InitiateCache(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)),
                                                           sequenceProvider.RefIndexToChromosome, sequenceProvider.GenomeAssembly);

            GenomeAssembly     = _transcriptCache.GenomeAssembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            _siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(pathPrefix)), PredictionCacheReader.SiftDescriptions);
            _polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix)), PredictionCacheReader.PolyphenDescriptions);
        }
Esempio n. 7
0
        private Dictionary <ushort, int> GetPredictionMatrixCount(string path)
        {
            var countPerRefSeq = new Dictionary <ushort, int>();

            using (var reader1 = new PredictionCacheReader(FileUtilities.GetReadStream(path)))
            {
                _numRefSeq = reader1.FileHeader.Index.Size;
                for (ushort i = 0; i < _numRefSeq; i++)
                {
                    var cache1 = reader1.Read(i);

                    countPerRefSeq[i] = cache1.PredictionCount;
                }
            }
            return(countPerRefSeq);
        }
Esempio n. 8
0
        public static DataBundle GetDataBundle(string referencePath, string cachePrefix)
        {
            var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath));
            var siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions);
            var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions);

            VC.TranscriptCacheData cacheData;
            VC.TranscriptCache     cache;
            Source source;

            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix))))
            {
                cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome);
                cache     = cacheData.GetCache();
                source    = transcriptReader.Header.Source;
            }

            return(new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source));
        }
Esempio n. 9
0
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix));

            (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;


            var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));

            _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions);

            var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));

            _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
        private static (Prediction[] Predictions, int Offset) CombinePredictions(ILogger logger, IChromosome chromosome,
                                                                                 string description, PredictionCacheReader reader, PredictionCacheReader reader2)
        {
            logger.Write($"- load {description} predictions... ");
            var predictions  = reader.GetPredictions(chromosome.Index);
            var predictions2 = reader2.GetPredictions(chromosome.Index);

            logger.WriteLine("finished.");

            var combinedPredictions = CombinePredictions(logger, description, predictions, predictions2);

            return(combinedPredictions, predictions.Length);
        }