예제 #1
0
        public void ReadItems_EndToEnd()
        {
            var expectedStrings = new[] { "Huey", "Duey", "Louie" };

            string[] observedStrings;

            using (var ms = new MemoryStream())
            {
                // ReSharper disable AccessToDisposedClosure
                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))
                {
                    TranscriptCacheWriter.WriteItems(writer, expectedStrings, x => writer.WriteOptAscii(x));
                }

                ms.Position = 0;

                using (var reader = new ExtendedBinaryReader(ms))
                {
                    observedStrings = TranscriptCacheReader.ReadItems(reader, () => reader.ReadAsciiString());
                }
                // ReSharper restore AccessToDisposedClosure
            }

            Assert.NotNull(observedStrings);
            Assert.Equal(expectedStrings, observedStrings);
        }
예제 #2
0
        public void TranscriptCacheReader_EndToEnd()
        {
            TranscriptCacheData observedCache;

            using (var ms = new MemoryStream())
            {
                using (var writer = new TranscriptCacheWriter(ms, _expectedHeader, true))
                {
                    writer.Write(_expectedCacheData);
                }

                ms.Position = 0;

                using (var reader = new TranscriptCacheReader(ms))
                {
                    observedCache = reader.Read(_refIndexToChromosome);
                }
            }

            Assert.NotNull(observedCache);
            Assert.Equal(_expectedCacheData.PeptideSeqs, observedCache.PeptideSeqs);
            CheckChromosomeIntervals(_expectedCacheData.Genes, observedCache.Genes);
            CheckIntervalArrays(_expectedCacheData.RegulatoryRegionIntervalArrays, observedCache.RegulatoryRegionIntervalArrays);
            CheckIntervalArrays(_expectedCacheData.TranscriptIntervalArrays, observedCache.TranscriptIntervalArrays);
            CheckIntervals(_expectedCacheData.TranscriptRegions, observedCache.TranscriptRegions);
            CheckIntervals(_expectedCacheData.Mirnas, observedCache.Mirnas);
        }
예제 #3
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData);

            using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
예제 #4
0
        private static TranscriptCache InitiateCache(Stream stream,
                                                     IDictionary <ushort, IChromosome> chromosomeIndexDictionary, GenomeAssembly genomeAssembly, ushort numRefSeq)
        {
            TranscriptCache cache;

            using (var reader = new TranscriptCacheReader(stream, genomeAssembly, numRefSeq)) cache = reader.Read(chromosomeIndexDictionary);
            return(cache);
        }
예제 #5
0
 public static IntervalArray <ITranscript>[] ReadCache(FileStream fileStream, IDictionary <ushort, IChromosome> refIndexToChromosome)
 {
     IntervalArray <ITranscript>[] transcriptIntervalArrays;
     using (var reader = new TranscriptCacheReader(fileStream))
     {
         transcriptIntervalArrays = reader.Read(refIndexToChromosome).TranscriptIntervalArrays;
     }
     return(transcriptIntervalArrays);
 }
예제 #6
0
 public void CheckGuard_InvalidGuard()
 {
     Assert.Throws <InvalidDataException>(delegate
     {
         using (var ms = new MemoryStream())
         {
             using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) writer.Write(7);
             ms.Position = 0;
             using (var reader = new ExtendedBinaryReader(ms)) TranscriptCacheReader.CheckGuard(reader);
         }
     });
 }
예제 #7
0
        public void GetSpliceIntervals_standard()
        {
            using (var transcriptCacheReader = new TranscriptCacheReader(GetCacheStream()))
            {
                var seqProvider     = GetCacheSequenceProvider();
                var transcriptData  = transcriptCacheReader.Read(seqProvider.RefIndexToChromosome);
                var spliceIntervals = SpliceUtilities.GetSpliceIntervals(seqProvider, transcriptData);

                Assert.Single(spliceIntervals);
                //given 2 exons, there should be 2 splice intervals
                Assert.Equal(2, spliceIntervals[2].Array.Length);
            }
        }
예제 #8
0
        public static TranscriptCacheData GetCache(string cachePath,
                                                   IDictionary <ushort, IChromosome> refIndexToChromosome)
        {
            if (!File.Exists(cachePath))
            {
                throw new FileNotFoundException($"Could not find {cachePath}");
            }

            TranscriptCacheData cache;

            using (var reader = new TranscriptCacheReader(FileUtilities.GetReadStream(cachePath))) cache = reader.Read(refIndexToChromosome);
            return(cache);
        }
        private static (TranscriptCache cache, ushort vepVersion) InitiateCache(Stream stream, IDictionary <ushort, IChromosome> refIndexToChromosome, GenomeAssembly refGenomeAssembly)
        {
            TranscriptCache cache;
            ushort          vepVersion;

            using (var reader = new TranscriptCacheReader(stream))
            {
                var customHeader = reader.Header.CustomHeader as TranscriptCacheCustomHeader;
                vepVersion = customHeader?.VepVersion ?? 0;

                CheckHeaderVersion(reader.Header, refGenomeAssembly);
                cache = reader.Read(refIndexToChromosome).GetCache();
            }

            return(cache, vepVersion);
        }
        private static (TranscriptCache Cache, IntervalArray <ITranscript>[] TranscriptIntervalArrays, ushort VepVersion) InitiateCache(Stream stream,
                                                                                                                                        IDictionary <ushort, IChromosome> refIndexToChromosome, GenomeAssembly refAssembly)
        {
            TranscriptCache     cache;
            ushort              vepVersion;
            TranscriptCacheData cacheData;

            using (var reader = new TranscriptCacheReader(stream))
            {
                vepVersion = reader.Header.Custom.VepVersion;
                CheckHeaderVersion(reader.Header, refAssembly);
                cacheData = reader.Read(refIndexToChromosome);
                cache     = cacheData.GetCache();
            }

            return(cache, cacheData.TranscriptIntervalArrays, vepVersion);
        }
        private static (TranscriptCacheData Cache, TranscriptCacheData Cache2) LoadTranscriptCaches(ILogger logger,
                                                                                                    string transcriptPath, string transcriptPath2, IDictionary <ushort, IChromosome> refIndexToChromosome)
        {
            TranscriptCacheData cache;
            TranscriptCacheData cache2;

            logger.Write("- loading transcript caches... ");

            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath)))
                using (var transcriptReader2 = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath2)))
                {
                    cache  = transcriptReader.Read(refIndexToChromosome);
                    cache2 = transcriptReader2.Read(refIndexToChromosome);
                }

            logger.WriteLine("finished.");
            return(cache, cache2);
        }
예제 #12
0
        public static DataBundle GetDataBundle(string referencePath, string cachePrefix)
        {
            var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath));
            var siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions);
            var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions);

            VC.TranscriptCacheData cacheData;
            VC.TranscriptCache     cache;
            Source source;

            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix))))
            {
                cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome);
                cache     = cacheData.GetCache();
                source    = transcriptReader.Header.Source;
            }

            return(new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source));
        }
예제 #13
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals      = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirEnstToGeneSymbols = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData);

            Dictionary <string, string> spliceAiEnstToGeneSymbols;

            using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile)))
            {
                spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader);
            }

            var spliceAiToNirvanaGeneSymbols =
                SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols);

            Console.WriteLine($"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})");

            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(spliceAiParser.GetItems());
                        }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
예제 #14
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals   = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirvanaGeneForest = SpliceUtilities.GetGeneForest(transcriptData);

            Console.WriteLine("Loaded transcripts and generated splice intervals.");

            Dictionary <string, List <string> > geneSymbolSynonyms;

            using (var geneInfoParser = new GeneInfoParser(GZipUtilities.GetAppropriateStreamReader(_geneInfoFile)))
            {
                geneSymbolSynonyms = geneInfoParser.GetGeneSymbolSynonyms();
            }

            Console.WriteLine("Loaded gene symbol synonyms");
            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, nirvanaGeneForest, geneSymbolSynonyms))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    {
                        var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false);
                        nsaWriter.Write(spliceAiParser.GetItems());
                    }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
예제 #15
0
        private static ExitCodes ProgramExecution()
        {
            Dictionary <string, string> geneIdToSymbols;

            using (var cacheStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_cachePrefix)))
                using (var transcriptCacheReader = new TranscriptCacheReader(cacheStream))
                    using (var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequncePath)))
                    {
                        geneIdToSymbols = LoadGenesFromCache(refProvider, transcriptCacheReader);
                        Console.WriteLine($"Loaded {geneIdToSymbols.Count} gene symbols from cache.");
                    }

            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            var outFileName = $"{version.Name}_{version.Version}";

            using (var gnomadGeneParser = new GnomadGeneParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), geneIdToSymbols))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, version, SaCommon.GnomadGeneScoreTag, SaCommon.SchemaVersion, false))
                    {
                        ngaWriter.Write(gnomadGeneParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
예제 #16
0
        private static Dictionary <string, string> LoadGenesFromCache(ReferenceSequenceProvider refProvider, TranscriptCacheReader cacheReader)
        {
            var transcriptData = cacheReader.Read(refProvider.RefIndexToChromosome);

            var geneIdToSymbols = new Dictionary <string, string>(transcriptData.Genes.Length);

            foreach (var gene in transcriptData.Genes)
            {
                var geneId = gene.EnsemblId.WithoutVersion;
                //if(geneId == "ENSG00000272962" || geneId == "ENSG00000198743")
                //    Console.WriteLine("bug");
                if (string.IsNullOrEmpty(geneId))
                {
                    continue;
                }

                if (!geneIdToSymbols.TryAdd(geneId, gene.Symbol))
                {
                    if (geneIdToSymbols[geneId] != gene.Symbol)
                    {
                        throw new DataMisalignedException($"Multiple symbols found for {geneId}");
                    }
                }
            }

            return(geneIdToSymbols);
        }
예제 #17
0
 public static TranscriptCacheData GetTranscriptData(IDictionary <ushort, IChromosome> refIndexToChromosome, string transcriptCachePrefix)
 {
     using var transcriptCacheReader = new TranscriptCacheReader(
               FileUtilities.GetReadStream(CacheConstants.TranscriptPath(transcriptCachePrefix)));
     return(transcriptCacheReader.Read(refIndexToChromosome));
 }