/// <summary> /// reads the intron data from the binary reader /// </summary> public static SimpleInterval Read(ExtendedBinaryReader reader) { int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); return(new SimpleInterval(start, end)); }
public JasixNode(ExtendedBinaryReader reader) { _start = reader.ReadOptInt32(); //on disk we will store the end as an offset to save space _end = _start + reader.ReadOptInt32(); FileLocation = reader.ReadOptInt64(); }
public static NgaReader Read(Stream stream) { (IDataSourceVersion version, string jsonKey, bool isArray) = ReadHeader(stream); Dictionary <string, List <string> > geneSymbolToJsonStrings; using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress)) using (var reader = new ExtendedBinaryReader(blockStream)) { int geneCount = reader.ReadOptInt32(); geneSymbolToJsonStrings = new Dictionary <string, List <string> >(geneCount); for (var i = 0; i < geneCount; i++) { string geneSymbol = reader.ReadAsciiString(); int numEntries = reader.ReadOptInt32(); var entries = new List <string>(numEntries); for (var j = 0; j < numEntries; j++) { entries.Add(reader.ReadString()); } geneSymbolToJsonStrings[geneSymbol] = entries; } } return(new NgaReader(version, jsonKey, isArray, geneSymbolToJsonStrings)); }
public NsaIndexBlock(ExtendedBinaryReader reader) { Start = reader.ReadOptInt32(); End = reader.ReadOptInt32(); FilePosition = reader.ReadOptInt64(); Length = reader.ReadOptInt32(); }
/// <summary> /// reads the transcript from the binary reader /// </summary> public static Transcript Read(ExtendedBinaryReader reader, Gene[] cacheGenes, SimpleInterval[] cacheIntrons, SimpleInterval[] cacheMirnas, string[] cachePeptideSeqs) { // transcript var referenceIndex = reader.ReadUInt16(); var start = reader.ReadOptInt32(); var end = reader.ReadOptInt32(); var id = CompactId.Read(reader); // gene var geneIndex = reader.ReadOptInt32(); var gene = cacheGenes[geneIndex]; // encoded data var encoded = new EncodedTranscriptData(reader.ReadUInt16(), reader.ReadByte()); // exons & introns var introns = encoded.HasIntrons ? ReadIndices(reader, cacheIntrons) : null; var cdnaMaps = encoded.HasCdnaMaps ? ReadCdnaMaps(reader) : null; // protein function predictions int siftIndex = encoded.HasSift ? reader.ReadOptInt32() : -1; int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1; // translation var translation = encoded.HasTranslation ? Translation.Read(reader, cachePeptideSeqs) : null; // attributes var mirnas = encoded.HasMirnas ? ReadIndices(reader, cacheMirnas) : null; return(new Transcript(referenceIndex, start, end, id, encoded.Version, translation, encoded.BioType, gene, TranscriptUtilities.GetTotalExonLength(cdnaMaps), encoded.StartExonPhase, encoded.IsCanonical, introns, mirnas, cdnaMaps, siftIndex, polyphenIndex, encoded.TranscriptSource)); }
private static (int SequenceOffset, int NumBases) GetMetadata(ExtendedBinaryReader reader) { int sequenceOffset = reader.ReadOptInt32(); int numBases = reader.ReadOptInt32(); return(sequenceOffset, numBases); }
public NsaIndex(Stream stream) { using (var memStream = new MemoryStream()) using (var memReader = new ExtendedBinaryReader(memStream)) { stream.CopyTo(memStream);//reading all bytes in stream to memStream memStream.Position = 0; Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader)); } } } }
public Chunk(ExtendedBinaryReader reader) { _start = reader.ReadOptInt32(); _end = reader.ReadOptInt32(); FilePosition = reader.ReadOptInt64(); Length = reader.ReadOptInt32(); }
public ChunkedIndex(Stream stream) { //reading the index in one shot var buffer = new byte[1048576]; var indexLength = stream.Read(buffer, 0, 1048576); using (var memStream = new MemoryStream(buffer, 0, indexLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromChunks[chromIndex] = new List <Chunk>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromChunks[chromIndex].Add(new Chunk(memReader)); } } } }
private static Interval <long> ReadInterval(ExtendedBinaryReader reader) { int begin = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); long position = reader.ReadOptInt64(); return(new Interval <long>(begin, end, position)); }
//read block but do not uncompress public void ReadCompressedBytes(ExtendedBinaryReader reader) { _compressedLength = reader.ReadOptInt32(); _firstPosition = reader.ReadOptInt32(); //_lastPosition = reader.ReadOptInt32(); _count = reader.ReadOptInt32(); reader.Read(_compressedBlock, 0, _compressedLength); }
public static IRnaEdit Read(ExtendedBinaryReader reader) { int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); string bases = reader.ReadAsciiString(); return(new RnaEdit(start, end, bases)); }
public static ICodingRegion Read(ExtendedBinaryReader reader) { int genomicStart = reader.ReadOptInt32(); int genomicEnd = reader.ReadOptInt32(); int cdnaStart = reader.ReadOptInt32(); int cdnaEnd = reader.ReadOptInt32(); int length = reader.ReadOptInt32(); return(new CodingRegion(genomicStart, genomicEnd, cdnaStart, cdnaEnd, length)); }
/// <summary> /// reads the regulatory element data from the binary reader /// </summary> public static RegulatoryElement Read(ExtendedBinaryReader reader) { var referenceIndex = reader.ReadUInt16(); int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); var type = (RegulatoryElementType)reader.ReadByte(); var id = CompactId.Read(reader); return(new RegulatoryElement(referenceIndex, start, end, id, type)); }
public void Read(ExtendedBinaryReader reader) { _compressedLength = reader.ReadOptInt32(); _firstPosition = reader.ReadOptInt32(); //_lastPosition = reader.ReadOptInt32(); _count = reader.ReadOptInt32(); reader.Read(_compressedBlock, 0, _compressedLength); _uncompressedLength = _compressionAlgorithm.Decompress(_compressedBlock, _compressedLength, _uncompressedBlock, _uncompressedBlock.Length); }
public static ITranscriptRegion Read(ExtendedBinaryReader reader) { TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte(); ushort id = reader.ReadOptUInt16(); int genomicStart = reader.ReadOptInt32(); int genomicEnd = reader.ReadOptInt32(); int cdnaStart = reader.ReadOptInt32(); int cdnaEnd = reader.ReadOptInt32(); return(new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd)); }
public SuppInterval(ExtendedBinaryReader reader) { string ensemblName = reader.ReadAsciiString(); string ucscName = reader.ReadAsciiString(); ushort chromIndex = reader.ReadOptUInt16(); Chromosome = new Chromosome(ucscName, ensemblName, chromIndex); Start = reader.ReadOptInt32(); End = reader.ReadOptInt32(); _jsonString = reader.ReadString(); }
/// <summary> /// reads the cDNA coordinate map from the binary reader /// </summary> public static CdnaCoordinateMap Read(ExtendedBinaryReader reader) { // read the genomic interval int genomicStart = reader.ReadOptInt32(); int genomicEnd = reader.ReadOptInt32(); // read the cDNA interval int cdnaStart = reader.ReadOptInt32(); int cdnaEnd = reader.ReadOptInt32(); return(new CdnaCoordinateMap(genomicStart, genomicEnd, cdnaStart, cdnaEnd)); }
private static T[] ReadIndices <T>(ExtendedBinaryReader reader, T[] cachedItems) { int numItems = reader.ReadOptInt32(); var items = new T[numItems]; for (int i = 0; i < numItems; i++) { var index = reader.ReadOptInt32(); items[i] = cachedItems[index]; } return(items); }
public static SuppInterval Read(ExtendedBinaryReader reader) { string ensemblName = reader.ReadAsciiString(); string ucscName = reader.ReadAsciiString(); ushort chromIndex = reader.ReadOptUInt16(); var chromosome = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex); var start = reader.ReadOptInt32(); var end = reader.ReadOptInt32(); var jsonString = reader.ReadString(); return(new SuppInterval(chromosome, start, end, jsonString)); }
public NsiReader(Stream stream) { _stream = stream; var compressData = new byte[MaxStreamLength]; int length = stream.Read(compressData, 0, MaxStreamLength); //uncompress var zstd = new Zstandard(); var decompressedLength = zstd.GetDecompressedLength(compressData, length); var decompressedData = new byte[decompressedLength]; zstd.Decompress(compressData, length, decompressedData, decompressedLength); using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Version = DataSourceVersion.Read(memReader); Assembly = (GenomeAssembly)memReader.ReadByte(); JsonKey = memReader.ReadAsciiString(); ReportFor = (ReportFor)memReader.ReadByte(); int schemaVersion = memReader.ReadOptInt32(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}"); } int count = memReader.ReadOptInt32(); var suppIntervals = new Dictionary <ushort, List <Interval <string> > >(); for (var i = 0; i < count; i++) { var saInterval = new SuppInterval(memReader); if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) { intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())); } else { suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > { new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) } }; } _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count); foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals) { _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray()); } } }
/// <summary> /// reads the gene data from the binary reader /// </summary> public static Gene Read(ExtendedBinaryReader reader) { ushort referenceIndex = reader.ReadUInt16(); int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); bool onReverseStrand = reader.ReadBoolean(); string symbol = reader.ReadAsciiString(); int hgncId = reader.ReadOptInt32(); var entrezId = CompactId.Read(reader); var ensemblId = CompactId.Read(reader); int mimNumber = reader.ReadOptInt32(); return(new Gene(referenceIndex, start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId, mimNumber)); }
public DataStructures.CustomInterval GetNextCustomInterval() { if (_reachedEnd) { return(null); } var chromosome = _referenceName; var type = _intervalType; var start = _reader.ReadOptInt32(); var end = _reader.ReadOptInt32(); var interval = new DataStructures.CustomInterval(chromosome, start, end, type, null, null); if (interval.IsEmpty()) { _reachedEnd = true; return(null); } var stringDictCount = _reader.ReadOptInt32(); if (stringDictCount > 0) { interval.StringValues = new Dictionary <string, string>(stringDictCount); for (var i = 0; i < stringDictCount; i++) { var key = _reader.ReadUtf8String(); var val = _reader.ReadUtf8String(); interval.StringValues.Add(key, val); } } var nonStringDictCount = _reader.ReadOptInt32(); if (nonStringDictCount > 0) { interval.NonStringValues = new Dictionary <string, string>(nonStringDictCount); for (var i = 0; i < nonStringDictCount; i++) { var key = _reader.ReadUtf8String(); var val = _reader.ReadUtf8String(); interval.NonStringValues.Add(key, val); } } return(interval); }
private static IntervalArray <MaskedEntry> GetMaskedEntries(ExtendedBinaryReader reader) { int numEntries = reader.ReadOptInt32(); var maskedEntries = new Interval <MaskedEntry> [numEntries]; for (var i = 0; i < numEntries; i++) { int begin = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); maskedEntries[i] = new Interval <MaskedEntry>(begin, end, new MaskedEntry(begin, end)); } return(new IntervalArray <MaskedEntry>(maskedEntries)); }
public static OmimEntry Read(ExtendedBinaryReader reader) { var geneSymbol = reader.ReadAsciiString(); var description = reader.ReadAsciiString(); var mimNumber = reader.ReadOptInt32(); var phenotypeCount = reader.ReadOptInt32(); var phenotypes = new List <Phenotype>(); for (var i = 0; i < phenotypeCount; i++) { phenotypes.Add(Phenotype.ReadPhenotype(reader)); } return(new OmimEntry(geneSymbol, description, mimNumber, phenotypes)); }
private Transcript[] ReadTranscripts(Gene[] genes, SimpleInterval[] introns, SimpleInterval[] mirnas, string[] peptideSeqs) { var numTranscripts = _reader.ReadOptInt32(); var transcripts = new Transcript[numTranscripts]; for (int i = 0; i < numTranscripts; i++) { transcripts[i] = Transcript.Read(_reader, genes, introns, mirnas, peptideSeqs); } CheckGuard(); return(transcripts); }
private void ReadHeader() { var header = _reader.ReadString(); if (header != SaDataBaseCommon.DataHeader) { throw new FormatException("Unrecognized header in this database"); } // ReSharper disable UnusedVariable var dataVersion = _reader.ReadUInt16(); var schema = _reader.ReadUInt16(); if (schema != SaDataBaseCommon.SchemaVersion) { throw new UserErrorException($"Gene database schema mismatch. Expected {SaDataBaseCommon.SchemaVersion}, observed {schema}"); } var genomeAssembly = (GenomeAssembly)_reader.ReadByte(); var creationTime = _reader.ReadInt64(); // ReSharper restore UnusedVariable var dataSourseVersionsCount = _reader.ReadOptInt32(); for (var i = 0; i < dataSourseVersionsCount; i++) { DataSourceVersions.Add(DataSourceVersion.Read(_reader)); } CheckGuard(); }
private static Band[] GetCytogeneticBands(ExtendedBinaryReader reader) { int numBands = reader.ReadOptInt32(); var bands = new Band[numBands]; for (var i = 0; i < numBands; i++) { int begin = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); string name = reader.ReadAsciiString(); bands[i] = new Band(begin, end, name); } return(bands); }
/// <summary> /// checks if the header is good /// </summary> private static SupplementaryAnnotationHeader GetHeader(ExtendedBinaryReader reader, out long intervalsPosition, string saPath = null) { // check the header and data version var header = System.Text.Encoding.ASCII.GetString(reader.ReadBytes(SupplementaryAnnotationCommon.DataHeader.Length)); var dataVersion = reader.ReadUInt16(); var schemaVersion = reader.ReadUInt16(); var genomeAssembly = (GenomeAssembly)reader.ReadByte(); if (header != SupplementaryAnnotationCommon.DataHeader || schemaVersion != SupplementaryAnnotationCommon.SchemaVersion) { throw new UserErrorException($"The header check failed for the supplementary annotation file ({saPath ?? "(resource)"}): ID: exp: {SupplementaryAnnotationCommon.DataHeader} obs: {header}, schema version: exp:{SupplementaryAnnotationCommon.SchemaVersion} obs: {schemaVersion}"); } var creationTimeTicks = reader.ReadInt64(); var referenceSequenceName = reader.ReadString(); // skip over the offsets since they're not currently used reader.ReadInt64(); // _dataSourceVersionsOffset reader.ReadInt64(); // _dataOffset intervalsPosition = reader.ReadInt64(); reader.ReadInt64(); // _eofOffset // load the data source versions var numDataSourceVersions = reader.ReadOptInt32(); var dataSourceVersions = new List <DataSourceVersion>(); for (var i = 0; i < numDataSourceVersions; i++) { dataSourceVersions.Add(DataSourceVersion.Read(reader)); } return(new SupplementaryAnnotationHeader(referenceSequenceName, creationTimeTicks, dataVersion, dataSourceVersions, genomeAssembly)); }
public static CompactId Read(ExtendedBinaryReader reader) { var id = (IdType)reader.ReadByte(); var info = reader.ReadOptInt32(); return(new CompactId(id, info)); }