예제 #1
0
        public NsaIndex(Stream stream)
        {
            using (var memStream = new MemoryStream())
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    stream.CopyTo(memStream);//reading all bytes in stream to memStream
                    memStream.Position = 0;

                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader));
                        }
                    }
                }
        }
예제 #2
0
        public ChunkedIndex(Stream stream)
        {
            //reading the index in one shot
            var buffer      = new byte[1048576];
            var indexLength = stream.Read(buffer, 0, 1048576);

            using (var memStream = new MemoryStream(buffer, 0, indexLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromChunks[chromIndex] = new List <Chunk>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromChunks[chromIndex].Add(new Chunk(memReader));
                        }
                    }
                }
        }
예제 #3
0
        public static IChromosome Read(ExtendedBinaryReader reader)
        {
            string ucscName         = reader.ReadAsciiString();
            string ensemblName      = reader.ReadAsciiString();
            string refseqAccession  = reader.ReadAsciiString();
            string genBankAccession = reader.ReadAsciiString();
            int    length           = reader.ReadOptInt32();
            ushort refIndex         = reader.ReadOptUInt16();

            return(new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex));
        }
예제 #4
0
        public SuppInterval(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();

            Chromosome = new Chromosome(ucscName, ensemblName, chromIndex);

            Start       = reader.ReadOptInt32();
            End         = reader.ReadOptInt32();
            _jsonString = reader.ReadString();
        }
예제 #5
0
        public static ITranscriptRegion Read(ExtendedBinaryReader reader)
        {
            TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte();
            ushort id           = reader.ReadOptUInt16();
            int    genomicStart = reader.ReadOptInt32();
            int    genomicEnd   = reader.ReadOptInt32();

            int cdnaStart = reader.ReadOptInt32();
            int cdnaEnd   = reader.ReadOptInt32();

            return(new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd));
        }
예제 #6
0
        private static ushort GetObservedShort(ushort expectedShort)
        {
            ushort observedShort;

            using (var memoryStream = GetMemoryStream(expectedShort))
                using (var reader = new ExtendedBinaryReader(memoryStream))
                {
                    observedShort = reader.ReadOptUInt16();
                }

            return(observedShort);
        }
예제 #7
0
        public static SuppInterval Read(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();
            var    chromosome  = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex);

            var start      = reader.ReadOptInt32();
            var end        = reader.ReadOptInt32();
            var jsonString = reader.ReadString();

            return(new SuppInterval(chromosome, start, end, jsonString));
        }
예제 #8
0
        public static ITranscript Read(ExtendedBinaryReader reader,
                                       IDictionary <ushort, IChromosome> chromosomeIndexDictionary, IGene[] cacheGenes,
                                       ITranscriptRegion[] cacheTranscriptRegions, IInterval[] cacheMirnas, string[] cachePeptideSeqs)
        {
            // transcript
            var referenceIndex = reader.ReadOptUInt16();
            var start          = reader.ReadOptInt32();
            var end            = reader.ReadOptInt32();
            var id             = CompactId.Read(reader);

            // gene
            var geneIndex = reader.ReadOptInt32();
            var gene      = cacheGenes[geneIndex];

            // encoded data
            var encoded = EncodedTranscriptData.Read(reader);

            // transcript regions
            var    transcriptRegions = encoded.HasTranscriptRegions ? ReadIndices(reader, cacheTranscriptRegions) : null;
            ushort numExons          = reader.ReadOptUInt16();

            // protein function predictions
            int siftIndex     = encoded.HasSift     ? reader.ReadOptInt32() : -1;
            int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1;

            // translation
            var translation = encoded.HasTranslation ? DataStructures.Translation.Read(reader, cachePeptideSeqs) : null;

            // attributes
            var mirnas          = encoded.HasMirnas          ? ReadIndices(reader, cacheMirnas)         : null;
            var rnaEdits        = encoded.HasRnaEdits        ? ReadItems(reader, RnaEdit.Read)          : null;
            var selenocysteines = encoded.HasSelenocysteines ? ReadItems(reader, x => x.ReadOptInt32()) : null;

            return(new Transcript(chromosomeIndexDictionary[referenceIndex], start, end, id, translation,
                                  encoded.BioType, gene, ExonUtilities.GetTotalExonLength(transcriptRegions), encoded.StartExonPhase,
                                  encoded.IsCanonical, transcriptRegions, numExons, mirnas, siftIndex, polyphenIndex,
                                  encoded.TranscriptSource, encoded.CdsStartNotFound, encoded.CdsEndNotFound, selenocysteines, rnaEdits));
        }
예제 #9
0
        public NgaReader(Stream stream)
        {
            _nsaStream = stream;
            // read the whole file. Currently they are well under 2MB
            var compressedBytes   = new byte[2 * 1024 * 1024];
            var decompressedBytes = new byte[20 * 1024 * 1024];
            var compressedSize    = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length);

            var zstd             = new Zstandard();
            var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length);

            _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize);
            _reader    = new ExtendedBinaryReader(_memStream);

            Version  = DataSourceVersion.Read(_reader);
            JsonKey  = _reader.ReadAsciiString();
            _isArray = _reader.ReadBoolean();
            ushort schemaVersion = _reader.ReadOptUInt16();

            if (schemaVersion != SaCommon.SchemaVersion)
            {
                throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}");
            }
        }