Ejemplo n.º 1
0
        /// <summary>
        /// reads the intron data from the binary reader
        /// </summary>
        public static SimpleInterval Read(ExtendedBinaryReader reader)
        {
            int start = reader.ReadOptInt32();
            int end   = reader.ReadOptInt32();

            return(new SimpleInterval(start, end));
        }
Ejemplo n.º 2
0
 public JasixNode(ExtendedBinaryReader reader)
 {
     _start = reader.ReadOptInt32();
     //on disk we will store the end as an offset to save space
     _end         = _start + reader.ReadOptInt32();
     FileLocation = reader.ReadOptInt64();
 }
Ejemplo n.º 3
0
        public static NgaReader Read(Stream stream)
        {
            (IDataSourceVersion version, string jsonKey, bool isArray) = ReadHeader(stream);

            Dictionary <string, List <string> > geneSymbolToJsonStrings;

            using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress))
                using (var reader = new ExtendedBinaryReader(blockStream))
                {
                    int geneCount = reader.ReadOptInt32();
                    geneSymbolToJsonStrings = new Dictionary <string, List <string> >(geneCount);

                    for (var i = 0; i < geneCount; i++)
                    {
                        string geneSymbol = reader.ReadAsciiString();
                        int    numEntries = reader.ReadOptInt32();
                        var    entries    = new List <string>(numEntries);

                        for (var j = 0; j < numEntries; j++)
                        {
                            entries.Add(reader.ReadString());
                        }

                        geneSymbolToJsonStrings[geneSymbol] = entries;
                    }
                }

            return(new NgaReader(version, jsonKey, isArray, geneSymbolToJsonStrings));
        }
Ejemplo n.º 4
0
 public NsaIndexBlock(ExtendedBinaryReader reader)
 {
     Start        = reader.ReadOptInt32();
     End          = reader.ReadOptInt32();
     FilePosition = reader.ReadOptInt64();
     Length       = reader.ReadOptInt32();
 }
Ejemplo n.º 5
0
        /// <summary>
        /// reads the transcript from the binary reader
        /// </summary>
        public static Transcript Read(ExtendedBinaryReader reader, Gene[] cacheGenes, SimpleInterval[] cacheIntrons,
                                      SimpleInterval[] cacheMirnas, string[] cachePeptideSeqs)
        {
            // transcript
            var referenceIndex = reader.ReadUInt16();
            var start          = reader.ReadOptInt32();
            var end            = reader.ReadOptInt32();
            var id             = CompactId.Read(reader);

            // gene
            var geneIndex = reader.ReadOptInt32();
            var gene      = cacheGenes[geneIndex];

            // encoded data
            var encoded = new EncodedTranscriptData(reader.ReadUInt16(), reader.ReadByte());

            // exons & introns
            var introns  = encoded.HasIntrons  ? ReadIndices(reader, cacheIntrons) : null;
            var cdnaMaps = encoded.HasCdnaMaps ? ReadCdnaMaps(reader)              : null;

            // protein function predictions
            int siftIndex     = encoded.HasSift     ? reader.ReadOptInt32() : -1;
            int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1;

            // translation
            var translation = encoded.HasTranslation ? Translation.Read(reader, cachePeptideSeqs) : null;

            // attributes
            var mirnas = encoded.HasMirnas ? ReadIndices(reader, cacheMirnas) : null;

            return(new Transcript(referenceIndex, start, end, id, encoded.Version, translation, encoded.BioType,
                                  gene, TranscriptUtilities.GetTotalExonLength(cdnaMaps), encoded.StartExonPhase, encoded.IsCanonical,
                                  introns, mirnas, cdnaMaps, siftIndex, polyphenIndex, encoded.TranscriptSource));
        }
Ejemplo n.º 6
0
        private static (int SequenceOffset, int NumBases) GetMetadata(ExtendedBinaryReader reader)
        {
            int sequenceOffset = reader.ReadOptInt32();
            int numBases       = reader.ReadOptInt32();

            return(sequenceOffset, numBases);
        }
Ejemplo n.º 7
0
        public NsaIndex(Stream stream)
        {
            using (var memStream = new MemoryStream())
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    stream.CopyTo(memStream);//reading all bytes in stream to memStream
                    memStream.Position = 0;

                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader));
                        }
                    }
                }
        }
Ejemplo n.º 8
0
 public Chunk(ExtendedBinaryReader reader)
 {
     _start       = reader.ReadOptInt32();
     _end         = reader.ReadOptInt32();
     FilePosition = reader.ReadOptInt64();
     Length       = reader.ReadOptInt32();
 }
Ejemplo n.º 9
0
        public ChunkedIndex(Stream stream)
        {
            //reading the index in one shot
            var buffer      = new byte[1048576];
            var indexLength = stream.Read(buffer, 0, 1048576);

            using (var memStream = new MemoryStream(buffer, 0, indexLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromChunks[chromIndex] = new List <Chunk>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromChunks[chromIndex].Add(new Chunk(memReader));
                        }
                    }
                }
        }
Ejemplo n.º 10
0
        private static Interval <long> ReadInterval(ExtendedBinaryReader reader)
        {
            int  begin    = reader.ReadOptInt32();
            int  end      = reader.ReadOptInt32();
            long position = reader.ReadOptInt64();

            return(new Interval <long>(begin, end, position));
        }
Ejemplo n.º 11
0
 //read block but do not uncompress
 public void ReadCompressedBytes(ExtendedBinaryReader reader)
 {
     _compressedLength = reader.ReadOptInt32();
     _firstPosition    = reader.ReadOptInt32();
     //_lastPosition   = reader.ReadOptInt32();
     _count = reader.ReadOptInt32();
     reader.Read(_compressedBlock, 0, _compressedLength);
 }
Ejemplo n.º 12
0
        public static IRnaEdit Read(ExtendedBinaryReader reader)
        {
            int    start = reader.ReadOptInt32();
            int    end   = reader.ReadOptInt32();
            string bases = reader.ReadAsciiString();

            return(new RnaEdit(start, end, bases));
        }
Ejemplo n.º 13
0
        public static ICodingRegion Read(ExtendedBinaryReader reader)
        {
            int genomicStart = reader.ReadOptInt32();
            int genomicEnd   = reader.ReadOptInt32();
            int cdnaStart    = reader.ReadOptInt32();
            int cdnaEnd      = reader.ReadOptInt32();
            int length       = reader.ReadOptInt32();

            return(new CodingRegion(genomicStart, genomicEnd, cdnaStart, cdnaEnd, length));
        }
Ejemplo n.º 14
0
        /// <summary>
        /// reads the regulatory element data from the binary reader
        /// </summary>
        public static RegulatoryElement Read(ExtendedBinaryReader reader)
        {
            var referenceIndex = reader.ReadUInt16();
            int start          = reader.ReadOptInt32();
            int end            = reader.ReadOptInt32();
            var type           = (RegulatoryElementType)reader.ReadByte();
            var id             = CompactId.Read(reader);

            return(new RegulatoryElement(referenceIndex, start, end, id, type));
        }
Ejemplo n.º 15
0
        public void Read(ExtendedBinaryReader reader)
        {
            _compressedLength = reader.ReadOptInt32();
            _firstPosition    = reader.ReadOptInt32();
            //_lastPosition = reader.ReadOptInt32();
            _count = reader.ReadOptInt32();
            reader.Read(_compressedBlock, 0, _compressedLength);

            _uncompressedLength = _compressionAlgorithm.Decompress(_compressedBlock, _compressedLength,
                                                                   _uncompressedBlock, _uncompressedBlock.Length);
        }
Ejemplo n.º 16
0
        public static ITranscriptRegion Read(ExtendedBinaryReader reader)
        {
            TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte();
            ushort id           = reader.ReadOptUInt16();
            int    genomicStart = reader.ReadOptInt32();
            int    genomicEnd   = reader.ReadOptInt32();

            int cdnaStart = reader.ReadOptInt32();
            int cdnaEnd   = reader.ReadOptInt32();

            return(new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd));
        }
Ejemplo n.º 17
0
        public SuppInterval(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();

            Chromosome = new Chromosome(ucscName, ensemblName, chromIndex);

            Start       = reader.ReadOptInt32();
            End         = reader.ReadOptInt32();
            _jsonString = reader.ReadString();
        }
Ejemplo n.º 18
0
        /// <summary>
        /// reads the cDNA coordinate map from the binary reader
        /// </summary>
        public static CdnaCoordinateMap Read(ExtendedBinaryReader reader)
        {
            // read the genomic interval
            int genomicStart = reader.ReadOptInt32();
            int genomicEnd   = reader.ReadOptInt32();

            // read the cDNA interval
            int cdnaStart = reader.ReadOptInt32();
            int cdnaEnd   = reader.ReadOptInt32();

            return(new CdnaCoordinateMap(genomicStart, genomicEnd, cdnaStart, cdnaEnd));
        }
Ejemplo n.º 19
0
        private static T[] ReadIndices <T>(ExtendedBinaryReader reader, T[] cachedItems)
        {
            int numItems = reader.ReadOptInt32();
            var items    = new T[numItems];

            for (int i = 0; i < numItems; i++)
            {
                var index = reader.ReadOptInt32();
                items[i] = cachedItems[index];
            }

            return(items);
        }
Ejemplo n.º 20
0
        public static SuppInterval Read(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();
            var    chromosome  = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex);

            var start      = reader.ReadOptInt32();
            var end        = reader.ReadOptInt32();
            var jsonString = reader.ReadString();

            return(new SuppInterval(chromosome, start, end, jsonString));
        }
Ejemplo n.º 21
0
        public NsiReader(Stream stream)
        {
            _stream = stream;
            var compressData = new byte[MaxStreamLength];
            int length       = stream.Read(compressData, 0, MaxStreamLength);
            //uncompress
            var zstd = new Zstandard();
            var decompressedLength = zstd.GetDecompressedLength(compressData, length);
            var decompressedData   = new byte[decompressedLength];

            zstd.Decompress(compressData, length, decompressedData, decompressedLength);

            using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Version   = DataSourceVersion.Read(memReader);
                    Assembly  = (GenomeAssembly)memReader.ReadByte();
                    JsonKey   = memReader.ReadAsciiString();
                    ReportFor = (ReportFor)memReader.ReadByte();
                    int schemaVersion = memReader.ReadOptInt32();

                    if (schemaVersion != SaCommon.SchemaVersion)
                    {
                        throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}");
                    }


                    int count         = memReader.ReadOptInt32();
                    var suppIntervals = new Dictionary <ushort, List <Interval <string> > >();
                    for (var i = 0; i < count; i++)
                    {
                        var saInterval = new SuppInterval(memReader);
                        if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals))
                        {
                            intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));
                        }
                        else
                        {
                            suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > {
                                new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())
                            }
                        };
                    }

                    _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count);
                    foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals)
                    {
                        _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray());
                    }
                }
        }
Ejemplo n.º 22
0
        /// <summary>
        /// reads the gene data from the binary reader
        /// </summary>
        public static Gene Read(ExtendedBinaryReader reader)
        {
            ushort referenceIndex  = reader.ReadUInt16();
            int    start           = reader.ReadOptInt32();
            int    end             = reader.ReadOptInt32();
            bool   onReverseStrand = reader.ReadBoolean();
            string symbol          = reader.ReadAsciiString();
            int    hgncId          = reader.ReadOptInt32();
            var    entrezId        = CompactId.Read(reader);
            var    ensemblId       = CompactId.Read(reader);
            int    mimNumber       = reader.ReadOptInt32();

            return(new Gene(referenceIndex, start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId, mimNumber));
        }
Ejemplo n.º 23
0
        public DataStructures.CustomInterval GetNextCustomInterval()
        {
            if (_reachedEnd)
            {
                return(null);
            }

            var chromosome = _referenceName;
            var type       = _intervalType;
            var start      = _reader.ReadOptInt32();
            var end        = _reader.ReadOptInt32();

            var interval = new DataStructures.CustomInterval(chromosome, start, end, type, null, null);

            if (interval.IsEmpty())
            {
                _reachedEnd = true;
                return(null);
            }

            var stringDictCount = _reader.ReadOptInt32();

            if (stringDictCount > 0)
            {
                interval.StringValues = new Dictionary <string, string>(stringDictCount);
                for (var i = 0; i < stringDictCount; i++)
                {
                    var key = _reader.ReadUtf8String();
                    var val = _reader.ReadUtf8String();

                    interval.StringValues.Add(key, val);
                }
            }

            var nonStringDictCount = _reader.ReadOptInt32();

            if (nonStringDictCount > 0)
            {
                interval.NonStringValues = new Dictionary <string, string>(nonStringDictCount);
                for (var i = 0; i < nonStringDictCount; i++)
                {
                    var key = _reader.ReadUtf8String();
                    var val = _reader.ReadUtf8String();

                    interval.NonStringValues.Add(key, val);
                }
            }
            return(interval);
        }
Ejemplo n.º 24
0
        private static IntervalArray <MaskedEntry> GetMaskedEntries(ExtendedBinaryReader reader)
        {
            int numEntries    = reader.ReadOptInt32();
            var maskedEntries = new Interval <MaskedEntry> [numEntries];

            for (var i = 0; i < numEntries; i++)
            {
                int begin = reader.ReadOptInt32();
                int end   = reader.ReadOptInt32();

                maskedEntries[i] = new Interval <MaskedEntry>(begin, end, new MaskedEntry(begin, end));
            }

            return(new IntervalArray <MaskedEntry>(maskedEntries));
        }
Ejemplo n.º 25
0
        public static OmimEntry Read(ExtendedBinaryReader reader)
        {
            var geneSymbol     = reader.ReadAsciiString();
            var description    = reader.ReadAsciiString();
            var mimNumber      = reader.ReadOptInt32();
            var phenotypeCount = reader.ReadOptInt32();
            var phenotypes     = new List <Phenotype>();

            for (var i = 0; i < phenotypeCount; i++)
            {
                phenotypes.Add(Phenotype.ReadPhenotype(reader));
            }

            return(new OmimEntry(geneSymbol, description, mimNumber, phenotypes));
        }
Ejemplo n.º 26
0
        private Transcript[] ReadTranscripts(Gene[] genes, SimpleInterval[] introns, SimpleInterval[] mirnas,
                                             string[] peptideSeqs)
        {
            var numTranscripts = _reader.ReadOptInt32();

            var transcripts = new Transcript[numTranscripts];

            for (int i = 0; i < numTranscripts; i++)
            {
                transcripts[i] = Transcript.Read(_reader, genes, introns, mirnas, peptideSeqs);
            }

            CheckGuard();
            return(transcripts);
        }
Ejemplo n.º 27
0
        private void ReadHeader()
        {
            var header = _reader.ReadString();

            if (header != SaDataBaseCommon.DataHeader)
            {
                throw new FormatException("Unrecognized header in this database");
            }

            // ReSharper disable UnusedVariable
            var dataVersion = _reader.ReadUInt16();

            var schema = _reader.ReadUInt16();

            if (schema != SaDataBaseCommon.SchemaVersion)
            {
                throw new UserErrorException($"Gene database schema mismatch. Expected {SaDataBaseCommon.SchemaVersion}, observed {schema}");
            }

            var genomeAssembly = (GenomeAssembly)_reader.ReadByte();
            var creationTime   = _reader.ReadInt64();
            // ReSharper restore UnusedVariable

            var dataSourseVersionsCount = _reader.ReadOptInt32();

            for (var i = 0; i < dataSourseVersionsCount; i++)
            {
                DataSourceVersions.Add(DataSourceVersion.Read(_reader));
            }

            CheckGuard();
        }
Ejemplo n.º 28
0
        private static Band[] GetCytogeneticBands(ExtendedBinaryReader reader)
        {
            int numBands = reader.ReadOptInt32();
            var bands    = new Band[numBands];

            for (var i = 0; i < numBands; i++)
            {
                int    begin = reader.ReadOptInt32();
                int    end   = reader.ReadOptInt32();
                string name  = reader.ReadAsciiString();

                bands[i] = new Band(begin, end, name);
            }

            return(bands);
        }
        /// <summary>
        /// checks if the header is good
        /// </summary>
        private static SupplementaryAnnotationHeader GetHeader(ExtendedBinaryReader reader, out long intervalsPosition, string saPath = null)
        {
            // check the header and data version
            var header         = System.Text.Encoding.ASCII.GetString(reader.ReadBytes(SupplementaryAnnotationCommon.DataHeader.Length));
            var dataVersion    = reader.ReadUInt16();
            var schemaVersion  = reader.ReadUInt16();
            var genomeAssembly = (GenomeAssembly)reader.ReadByte();

            if (header != SupplementaryAnnotationCommon.DataHeader || schemaVersion != SupplementaryAnnotationCommon.SchemaVersion)
            {
                throw new UserErrorException($"The header check failed for the supplementary annotation file ({saPath ?? "(resource)"}): ID: exp: {SupplementaryAnnotationCommon.DataHeader} obs: {header}, schema version: exp:{SupplementaryAnnotationCommon.SchemaVersion} obs: {schemaVersion}");
            }

            var creationTimeTicks     = reader.ReadInt64();
            var referenceSequenceName = reader.ReadString();

            // skip over the offsets since they're not currently used
            reader.ReadInt64(); // _dataSourceVersionsOffset
            reader.ReadInt64(); // _dataOffset
            intervalsPosition = reader.ReadInt64();
            reader.ReadInt64(); // _eofOffset

            // load the data source versions
            var numDataSourceVersions = reader.ReadOptInt32();
            var dataSourceVersions    = new List <DataSourceVersion>();

            for (var i = 0; i < numDataSourceVersions; i++)
            {
                dataSourceVersions.Add(DataSourceVersion.Read(reader));
            }

            return(new SupplementaryAnnotationHeader(referenceSequenceName, creationTimeTicks, dataVersion,
                                                     dataSourceVersions, genomeAssembly));
        }
Ejemplo n.º 30
0
        public static CompactId Read(ExtendedBinaryReader reader)
        {
            var id   = (IdType)reader.ReadByte();
            var info = reader.ReadOptInt32();

            return(new CompactId(id, info));
        }