public void CacheHeader_EndToEnd() { const Source expectedTranscriptSource = Source.BothRefSeqAndEnsembl; const long expectedCreationTimeTicks = long.MaxValue; const GenomeAssembly expectedAssembly = GenomeAssembly.hg19; const ushort expectedVepVersion = ushort.MaxValue; var expectedBaseHeader = new Header("VEP", 1, 2, expectedTranscriptSource, expectedCreationTimeTicks, expectedAssembly); var expectedCustomHeader = new TranscriptCacheCustomHeader(expectedVepVersion, 0); var expectedHeader = new CacheHeader(expectedBaseHeader, expectedCustomHeader); CacheHeader observedHeader; using (var ms = new MemoryStream()) { using (var writer = new BinaryWriter(ms, Encoding.UTF8, true)) { expectedHeader.Write(writer); } ms.Position = 0; observedHeader = CacheHeader.Read(ms); } Assert.NotNull(observedHeader); Assert.Equal(expectedTranscriptSource, observedHeader.Source); Assert.Equal(expectedCreationTimeTicks, observedHeader.CreationTimeTicks); Assert.Equal(expectedAssembly, observedHeader.Assembly); Assert.Equal(expectedVepVersion, observedHeader.Custom.VepVersion); }
public TranscriptCacheReaderTests() { var chr1 = new Chromosome("chr1", "1", 0); var chr2 = new Chromosome("chr2", "2", 1); var chr3 = new Chromosome("chr3", "3", 2); _refIndexToChromosome = new Dictionary <ushort, IChromosome> { [chr1.Index] = chr1, [chr2.Index] = chr2, [chr3.Index] = chr3 }; const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38; var baseHeader = new Header("test", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly); var customHeader = new TranscriptCacheCustomHeader(1, 2); _expectedHeader = new CacheHeader(baseHeader, customHeader); var transcriptRegions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399), new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400), new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499) }; var mirnas = new IInterval[2]; mirnas[0] = new Interval(100, 200); mirnas[1] = new Interval(300, 400); var peptideSeqs = new[] { "MASE*" }; var genes = new IGene[1]; genes[0] = new Gene(chr3, 100, 200, true, "TP53", 300, CompactId.Convert("7157"), CompactId.Convert("ENSG00000141510")); var regulatoryRegions = new IRegulatoryRegion[2]; regulatoryRegions[0] = new RegulatoryRegion(chr3, 1200, 1300, CompactId.Convert("123"), RegulatoryRegionType.enhancer); regulatoryRegions[1] = new RegulatoryRegion(chr3, 1250, 1450, CompactId.Convert("456"), RegulatoryRegionType.enhancer); var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3); var transcripts = GetTranscripts(chr3, genes, transcriptRegions, mirnas); var transcriptIntervalArrays = transcripts.ToIntervalArrays(3); _expectedCacheData = new TranscriptCacheData(_expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs, transcriptIntervalArrays, regulatoryRegionIntervalArrays); }
private static Stream GetCacheStream() { const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38; var baseHeader = new Header("test", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly); var customHeader = new TranscriptCacheCustomHeader(1, 2); var expectedHeader = new CacheHeader(baseHeader, customHeader); var transcriptRegions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399), new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400), new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499) }; var mirnas = new IInterval[2]; mirnas[0] = new Interval(100, 200); mirnas[1] = new Interval(300, 400); var peptideSeqs = new[] { "MASE*" }; var genes = new IGene[1]; genes[0] = new Gene(ChromosomeUtilities.Chr3, 100, 200, true, "TP53", 300, CompactId.Convert("7157"), CompactId.Convert("ENSG00000141510")); var regulatoryRegions = new IRegulatoryRegion[2]; regulatoryRegions[0] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1200, 1300, CompactId.Convert("123"), RegulatoryRegionType.enhancer); regulatoryRegions[1] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1250, 1450, CompactId.Convert("456"), RegulatoryRegionType.enhancer); var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3); var transcripts = GetTranscripts(ChromosomeUtilities.Chr3, genes, transcriptRegions, mirnas); var transcriptIntervalArrays = transcripts.ToIntervalArrays(3); var expectedCacheData = new TranscriptCacheData(expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs, transcriptIntervalArrays, regulatoryRegionIntervalArrays); var ms = new MemoryStream(); using (var writer = new TranscriptCacheWriter(ms, expectedHeader, true)) { writer.Write(expectedCacheData); } ms.Position = 0; return(ms); }
public TranscriptCacheStaging CreateTranscriptCache(MutableTranscript[] mutableTranscripts, IEnumerable <IRegulatoryRegion> regulatoryRegions, IIntervalForest <UgaGene> geneForest, int numRefSeqs) { Logger.Write("- assigning UGA genes to transcripts... "); AssignUgaGenesToTranscripts(mutableTranscripts, geneForest); Logger.WriteLine("finished."); var transcriptIntervalArrays = mutableTranscripts.ToTranscripts().ToIntervalArrays(numRefSeqs); var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(numRefSeqs); var customHeader = new TranscriptCacheCustomHeader(_vepVersion, _vepReleaseTicks); var header = new CacheHeader(HeaderUtilities.GetHeader(_source, _genomeAssembly), customHeader); return(TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays)); }
private static (ushort Schema, ushort Data, ushort Vep) GetHeaderInformation(string cachePath) { CacheHeader header; TranscriptCacheCustomHeader customHeader = null; using (var stream = FileUtilities.GetReadStream(cachePath)) using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress)) { header = blockStream.ReadHeader(CacheHeader.Read, TranscriptCacheCustomHeader.Read) as CacheHeader; if (header != null) { customHeader = header.CustomHeader as TranscriptCacheCustomHeader; } } if (header == null || customHeader == null) { throw new InvalidFileFormatException($"Could not parse the header information correctly for {cachePath}"); } return(header.SchemaVersion, header.DataVersion, customHeader.VepVersion); }