public void ReadItems_EndToEnd() { var expectedStrings = new[] { "Huey", "Duey", "Louie" }; string[] observedStrings; using (var ms = new MemoryStream()) { // ReSharper disable AccessToDisposedClosure using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) { TranscriptCacheWriter.WriteItems(writer, expectedStrings, x => writer.WriteOptAscii(x)); } ms.Position = 0; using (var reader = new ExtendedBinaryReader(ms)) { observedStrings = TranscriptCacheReader.ReadItems(reader, () => reader.ReadAsciiString()); } // ReSharper restore AccessToDisposedClosure } Assert.NotNull(observedStrings); Assert.Equal(expectedStrings, observedStrings); }
public void TranscriptCacheReader_EndToEnd() { TranscriptCacheData observedCache; using (var ms = new MemoryStream()) { using (var writer = new TranscriptCacheWriter(ms, _expectedHeader, true)) { writer.Write(_expectedCacheData); } ms.Position = 0; using (var reader = new TranscriptCacheReader(ms)) { observedCache = reader.Read(_refIndexToChromosome); } } Assert.NotNull(observedCache); Assert.Equal(_expectedCacheData.PeptideSeqs, observedCache.PeptideSeqs); CheckChromosomeIntervals(_expectedCacheData.Genes, observedCache.Genes); CheckIntervalArrays(_expectedCacheData.RegulatoryRegionIntervalArrays, observedCache.RegulatoryRegionIntervalArrays); CheckIntervalArrays(_expectedCacheData.TranscriptIntervalArrays, observedCache.TranscriptIntervalArrays); CheckIntervals(_expectedCacheData.TranscriptRegions, observedCache.TranscriptRegions); CheckIntervals(_expectedCacheData.Mirnas, observedCache.Mirnas); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData); using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(primateAiParser.GetItems()); } return(ExitCodes.Success); }
private static TranscriptCache InitiateCache(Stream stream, IDictionary <ushort, IChromosome> chromosomeIndexDictionary, GenomeAssembly genomeAssembly, ushort numRefSeq) { TranscriptCache cache; using (var reader = new TranscriptCacheReader(stream, genomeAssembly, numRefSeq)) cache = reader.Read(chromosomeIndexDictionary); return(cache); }
public static IntervalArray <ITranscript>[] ReadCache(FileStream fileStream, IDictionary <ushort, IChromosome> refIndexToChromosome) { IntervalArray <ITranscript>[] transcriptIntervalArrays; using (var reader = new TranscriptCacheReader(fileStream)) { transcriptIntervalArrays = reader.Read(refIndexToChromosome).TranscriptIntervalArrays; } return(transcriptIntervalArrays); }
public void CheckGuard_InvalidGuard() { Assert.Throws <InvalidDataException>(delegate { using (var ms = new MemoryStream()) { using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) writer.Write(7); ms.Position = 0; using (var reader = new ExtendedBinaryReader(ms)) TranscriptCacheReader.CheckGuard(reader); } }); }
public void GetSpliceIntervals_standard() { using (var transcriptCacheReader = new TranscriptCacheReader(GetCacheStream())) { var seqProvider = GetCacheSequenceProvider(); var transcriptData = transcriptCacheReader.Read(seqProvider.RefIndexToChromosome); var spliceIntervals = SpliceUtilities.GetSpliceIntervals(seqProvider, transcriptData); Assert.Single(spliceIntervals); //given 2 exons, there should be 2 splice intervals Assert.Equal(2, spliceIntervals[2].Array.Length); } }
public static TranscriptCacheData GetCache(string cachePath, IDictionary <ushort, IChromosome> refIndexToChromosome) { if (!File.Exists(cachePath)) { throw new FileNotFoundException($"Could not find {cachePath}"); } TranscriptCacheData cache; using (var reader = new TranscriptCacheReader(FileUtilities.GetReadStream(cachePath))) cache = reader.Read(refIndexToChromosome); return(cache); }
private static (TranscriptCache cache, ushort vepVersion) InitiateCache(Stream stream, IDictionary <ushort, IChromosome> refIndexToChromosome, GenomeAssembly refGenomeAssembly) { TranscriptCache cache; ushort vepVersion; using (var reader = new TranscriptCacheReader(stream)) { var customHeader = reader.Header.CustomHeader as TranscriptCacheCustomHeader; vepVersion = customHeader?.VepVersion ?? 0; CheckHeaderVersion(reader.Header, refGenomeAssembly); cache = reader.Read(refIndexToChromosome).GetCache(); } return(cache, vepVersion); }
private static (TranscriptCache Cache, IntervalArray <ITranscript>[] TranscriptIntervalArrays, ushort VepVersion) InitiateCache(Stream stream, IDictionary <ushort, IChromosome> refIndexToChromosome, GenomeAssembly refAssembly) { TranscriptCache cache; ushort vepVersion; TranscriptCacheData cacheData; using (var reader = new TranscriptCacheReader(stream)) { vepVersion = reader.Header.Custom.VepVersion; CheckHeaderVersion(reader.Header, refAssembly); cacheData = reader.Read(refIndexToChromosome); cache = cacheData.GetCache(); } return(cache, cacheData.TranscriptIntervalArrays, vepVersion); }
private static (TranscriptCacheData Cache, TranscriptCacheData Cache2) LoadTranscriptCaches(ILogger logger, string transcriptPath, string transcriptPath2, IDictionary <ushort, IChromosome> refIndexToChromosome) { TranscriptCacheData cache; TranscriptCacheData cache2; logger.Write("- loading transcript caches... "); using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath))) using (var transcriptReader2 = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath2))) { cache = transcriptReader.Read(refIndexToChromosome); cache2 = transcriptReader2.Read(refIndexToChromosome); } logger.WriteLine("finished."); return(cache, cache2); }
public static DataBundle GetDataBundle(string referencePath, string cachePrefix) { var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath)); var siftReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions); var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions); VC.TranscriptCacheData cacheData; VC.TranscriptCache cache; Source source; using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix)))) { cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome); cache = cacheData.GetCache(); source = transcriptReader.Header.Source; } return(new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source)); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var spliceIntervals = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData); var nirEnstToGeneSymbols = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData); Dictionary <string, string> spliceAiEnstToGeneSymbols; using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile))) { spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader); } var spliceAiToNirvanaGeneSymbols = SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols); Console.WriteLine($"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})"); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var spliceAiParser = new SpliceAiParser( GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(spliceAiParser.GetItems()); } Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}"); return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var spliceIntervals = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData); var nirvanaGeneForest = SpliceUtilities.GetGeneForest(transcriptData); Console.WriteLine("Loaded transcripts and generated splice intervals."); Dictionary <string, List <string> > geneSymbolSynonyms; using (var geneInfoParser = new GeneInfoParser(GZipUtilities.GetAppropriateStreamReader(_geneInfoFile))) { geneSymbolSynonyms = geneInfoParser.GetGeneSymbolSynonyms(); } Console.WriteLine("Loaded gene symbol synonyms"); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var spliceAiParser = new SpliceAiParser( GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, spliceIntervals, nirvanaGeneForest, geneSymbolSynonyms)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) { var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false); nsaWriter.Write(spliceAiParser.GetItems()); } Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}"); return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { Dictionary <string, string> geneIdToSymbols; using (var cacheStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_cachePrefix))) using (var transcriptCacheReader = new TranscriptCacheReader(cacheStream)) using (var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequncePath))) { geneIdToSymbols = LoadGenesFromCache(refProvider, transcriptCacheReader); Console.WriteLine($"Loaded {geneIdToSymbols.Count} gene symbols from cache."); } var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); var outFileName = $"{version.Name}_{version.Version}"; using (var gnomadGeneParser = new GnomadGeneParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), geneIdToSymbols)) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix))) using (var ngaWriter = new NgaWriter(stream, version, SaCommon.GnomadGeneScoreTag, SaCommon.SchemaVersion, false)) { ngaWriter.Write(gnomadGeneParser.GetItems()); } return(ExitCodes.Success); }
private static Dictionary <string, string> LoadGenesFromCache(ReferenceSequenceProvider refProvider, TranscriptCacheReader cacheReader) { var transcriptData = cacheReader.Read(refProvider.RefIndexToChromosome); var geneIdToSymbols = new Dictionary <string, string>(transcriptData.Genes.Length); foreach (var gene in transcriptData.Genes) { var geneId = gene.EnsemblId.WithoutVersion; //if(geneId == "ENSG00000272962" || geneId == "ENSG00000198743") // Console.WriteLine("bug"); if (string.IsNullOrEmpty(geneId)) { continue; } if (!geneIdToSymbols.TryAdd(geneId, gene.Symbol)) { if (geneIdToSymbols[geneId] != gene.Symbol) { throw new DataMisalignedException($"Multiple symbols found for {geneId}"); } } } return(geneIdToSymbols); }
public static TranscriptCacheData GetTranscriptData(IDictionary <ushort, IChromosome> refIndexToChromosome, string transcriptCachePrefix) { using var transcriptCacheReader = new TranscriptCacheReader( FileUtilities.GetReadStream(CacheConstants.TranscriptPath(transcriptCachePrefix))); return(transcriptCacheReader.Read(refIndexToChromosome)); }