Esempio n. 1
0
        public void CacheHeader_EndToEnd()
        {
            const Source         expectedTranscriptSource  = Source.BothRefSeqAndEnsembl;
            const long           expectedCreationTimeTicks = long.MaxValue;
            const GenomeAssembly expectedAssembly          = GenomeAssembly.hg19;
            const ushort         expectedVepVersion        = ushort.MaxValue;

            var expectedBaseHeader   = new Header("VEP", 1, 2, expectedTranscriptSource, expectedCreationTimeTicks, expectedAssembly);
            var expectedCustomHeader = new TranscriptCacheCustomHeader(expectedVepVersion, 0);
            var expectedHeader       = new CacheHeader(expectedBaseHeader, expectedCustomHeader);

            CacheHeader observedHeader;

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms, Encoding.UTF8, true))
                {
                    expectedHeader.Write(writer);
                }

                ms.Position    = 0;
                observedHeader = CacheHeader.Read(ms);
            }

            Assert.NotNull(observedHeader);
            Assert.Equal(expectedTranscriptSource, observedHeader.Source);
            Assert.Equal(expectedCreationTimeTicks, observedHeader.CreationTimeTicks);
            Assert.Equal(expectedAssembly, observedHeader.Assembly);
            Assert.Equal(expectedVepVersion, observedHeader.Custom.VepVersion);
        }
Esempio n. 2
0
        public TranscriptCacheReaderTests()
        {
            var chr1 = new Chromosome("chr1", "1", 0);
            var chr2 = new Chromosome("chr2", "2", 1);
            var chr3 = new Chromosome("chr3", "3", 2);

            _refIndexToChromosome = new Dictionary <ushort, IChromosome>
            {
                [chr1.Index] = chr1,
                [chr2.Index] = chr2,
                [chr3.Index] = chr3
            };

            const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38;

            var baseHeader   = new Header("test", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly);
            var customHeader = new TranscriptCacheCustomHeader(1, 2);

            _expectedHeader = new CacheHeader(baseHeader, customHeader);

            var transcriptRegions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399),
                new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400),
                new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499)
            };

            var mirnas = new IInterval[2];

            mirnas[0] = new Interval(100, 200);
            mirnas[1] = new Interval(300, 400);

            var peptideSeqs = new[] { "MASE*" };

            var genes = new IGene[1];

            genes[0] = new Gene(chr3, 100, 200, true, "TP53", 300, CompactId.Convert("7157"),
                                CompactId.Convert("ENSG00000141510"));

            var regulatoryRegions = new IRegulatoryRegion[2];

            regulatoryRegions[0] = new RegulatoryRegion(chr3, 1200, 1300, CompactId.Convert("123"), RegulatoryRegionType.enhancer);
            regulatoryRegions[1] = new RegulatoryRegion(chr3, 1250, 1450, CompactId.Convert("456"), RegulatoryRegionType.enhancer);
            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3);

            var transcripts = GetTranscripts(chr3, genes, transcriptRegions, mirnas);
            var transcriptIntervalArrays = transcripts.ToIntervalArrays(3);

            _expectedCacheData = new TranscriptCacheData(_expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs,
                                                         transcriptIntervalArrays, regulatoryRegionIntervalArrays);
        }
Esempio n. 3
0
        private static Stream GetCacheStream()
        {
            const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38;

            var baseHeader     = new Header("test", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly);
            var customHeader   = new TranscriptCacheCustomHeader(1, 2);
            var expectedHeader = new CacheHeader(baseHeader, customHeader);

            var transcriptRegions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399),
                new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400),
                new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499)
            };

            var mirnas = new IInterval[2];

            mirnas[0] = new Interval(100, 200);
            mirnas[1] = new Interval(300, 400);

            var peptideSeqs = new[] { "MASE*" };

            var genes = new IGene[1];

            genes[0] = new Gene(ChromosomeUtilities.Chr3, 100, 200, true, "TP53", 300, CompactId.Convert("7157"),
                                CompactId.Convert("ENSG00000141510"));

            var regulatoryRegions = new IRegulatoryRegion[2];

            regulatoryRegions[0] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1200, 1300, CompactId.Convert("123"), RegulatoryRegionType.enhancer);
            regulatoryRegions[1] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1250, 1450, CompactId.Convert("456"), RegulatoryRegionType.enhancer);
            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3);

            var transcripts = GetTranscripts(ChromosomeUtilities.Chr3, genes, transcriptRegions, mirnas);
            var transcriptIntervalArrays = transcripts.ToIntervalArrays(3);

            var expectedCacheData = new TranscriptCacheData(expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs,
                                                            transcriptIntervalArrays, regulatoryRegionIntervalArrays);

            var ms = new MemoryStream();

            using (var writer = new TranscriptCacheWriter(ms, expectedHeader, true))
            {
                writer.Write(expectedCacheData);
            }

            ms.Position = 0;

            return(ms);
        }
Esempio n. 4
0
        public TranscriptCacheStaging CreateTranscriptCache(MutableTranscript[] mutableTranscripts,
                                                            IEnumerable <IRegulatoryRegion> regulatoryRegions, IIntervalForest <UgaGene> geneForest, int numRefSeqs)
        {
            Logger.Write("- assigning UGA genes to transcripts... ");
            AssignUgaGenesToTranscripts(mutableTranscripts, geneForest);
            Logger.WriteLine("finished.");

            var transcriptIntervalArrays       = mutableTranscripts.ToTranscripts().ToIntervalArrays(numRefSeqs);
            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(numRefSeqs);

            var customHeader = new TranscriptCacheCustomHeader(_vepVersion, _vepReleaseTicks);
            var header       = new CacheHeader(HeaderUtilities.GetHeader(_source, _genomeAssembly), customHeader);

            return(TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays));
        }
Esempio n. 5
0
        private static (ushort Schema, ushort Data, ushort Vep) GetHeaderInformation(string cachePath)
        {
            CacheHeader header;
            TranscriptCacheCustomHeader customHeader = null;

            using (var stream = FileUtilities.GetReadStream(cachePath))
                using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress))
                {
                    header = blockStream.ReadHeader(CacheHeader.Read, TranscriptCacheCustomHeader.Read) as CacheHeader;
                    if (header != null)
                    {
                        customHeader = header.CustomHeader as TranscriptCacheCustomHeader;
                    }
                }

            if (header == null || customHeader == null)
            {
                throw new InvalidFileFormatException($"Could not parse the header information correctly for {cachePath}");
            }

            return(header.SchemaVersion, header.DataVersion, customHeader.VepVersion);
        }