private static (IDataSourceVersion, GenomeAssembly, string, ReportFor, int) ReadHeader(Stream stream) { using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true)) { var identifier = reader.ReadAsciiString(); if (identifier != SaCommon.NsiIdentifier) { throw new InvalidDataException($"Failed to find identifier!!Expected: {SaCommon.NsiIdentifier}, observed:{identifier}"); } var version = DataSourceVersion.Read(reader); var assembly = (GenomeAssembly)reader.ReadByte(); var jsonKey = reader.ReadAsciiString(); var reportFor = (ReportFor)reader.ReadByte(); int schemaVersion = reader.ReadInt32(); var guard = reader.ReadUInt32(); if (guard != SaCommon.GuardInt) { throw new InvalidDataException($"Failed to find guard int!!Expected: {SaCommon.GuardInt}, observed:{guard}"); } return(version, assembly, jsonKey, reportFor, schemaVersion); } }
public ChunkedIndex(Stream stream) { //reading the index in one shot var buffer = new byte[1048576]; var indexLength = stream.Read(buffer, 0, 1048576); using (var memStream = new MemoryStream(buffer, 0, indexLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromChunks[chromIndex] = new List <Chunk>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromChunks[chromIndex].Add(new Chunk(memReader)); } } } }
private void ReadHeader() { var header = _reader.ReadString(); if (header != SaDataBaseCommon.DataHeader) { throw new FormatException("Unrecognized header in this database"); } // ReSharper disable UnusedVariable var dataVersion = _reader.ReadUInt16(); var schema = _reader.ReadUInt16(); if (schema != SaDataBaseCommon.SchemaVersion) { throw new UserErrorException($"Gene database schema mismatch. Expected {SaDataBaseCommon.SchemaVersion}, observed {schema}"); } var genomeAssembly = (GenomeAssembly)_reader.ReadByte(); var creationTime = _reader.ReadInt64(); // ReSharper restore UnusedVariable var dataSourseVersionsCount = _reader.ReadOptInt32(); for (var i = 0; i < dataSourseVersionsCount; i++) { DataSourceVersions.Add(DataSourceVersion.Read(_reader)); } CheckGuard(); }
private static (IDataSourceVersion Version, string JsonKey, bool IsArray) ReadHeader(Stream stream) { IDataSourceVersion version; string jsonKey; bool isArray; using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true)) { string identifier = reader.ReadString(); if (identifier != SaCommon.NgaIdentifier) { throw new InvalidDataException($"Expected the NGA identifier ({SaCommon.NgaIdentifier}), but found another value: ({identifier})"); } version = DataSourceVersion.Read(reader); jsonKey = reader.ReadString(); isArray = reader.ReadBoolean(); ushort schemaVersion = reader.ReadUInt16(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Expected the schema version {SaCommon.SchemaVersion}, but found another value: ({schemaVersion}) for {jsonKey}"); } uint guard = reader.ReadUInt32(); if (guard != SaCommon.GuardInt) { throw new InvalidDataException($"Expected a guard integer ({SaCommon.GuardInt}), but found another value: ({guard})"); } } return(version, jsonKey, isArray); }
public NsaIndex(Stream stream) { using (var memStream = new MemoryStream()) using (var memReader = new ExtendedBinaryReader(memStream)) { stream.CopyTo(memStream);//reading all bytes in stream to memStream memStream.Position = 0; Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader)); } } } }
public ProteinConservationReader(Stream stream) { _reader = new ExtendedBinaryReader(stream); var schemaVersion = _reader.ReadOptInt32(); if (schemaVersion != ProteinConservationCommon.SchemaVersion) { throw new Exception($"Schema version mismatch found. Observed: {schemaVersion}, expected: {ProteinConservationCommon.SchemaVersion}"); } Assembly = (GenomeAssembly)_reader.ReadByte(); Version = DataSourceVersion.Read(_reader); }
public NsiReader(Stream stream) { _stream = stream; var compressData = new byte[MaxStreamLength]; int length = stream.Read(compressData, 0, MaxStreamLength); //uncompress var zstd = new Zstandard(); var decompressedLength = zstd.GetDecompressedLength(compressData, length); var decompressedData = new byte[decompressedLength]; zstd.Decompress(compressData, length, decompressedData, decompressedLength); using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Version = DataSourceVersion.Read(memReader); Assembly = (GenomeAssembly)memReader.ReadByte(); JsonKey = memReader.ReadAsciiString(); ReportFor = (ReportFor)memReader.ReadByte(); int schemaVersion = memReader.ReadOptInt32(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}"); } int count = memReader.ReadOptInt32(); var suppIntervals = new Dictionary <ushort, List <Interval <string> > >(); for (var i = 0; i < count; i++) { var saInterval = new SuppInterval(memReader); if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) { intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())); } else { suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > { new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) } }; } _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count); foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals) { _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray()); } } }
private void LoadHeader() { var identifier = _reader.ReadString(); if (identifier != PhylopCommon.Header) { throw new InvalidDataException("Unrecognized file header: " + identifier); } var schemaVersion = _reader.ReadInt16(); if (schemaVersion != PhylopCommon.SchemaVersion) { throw new InvalidDataException("Expected phylop schema version:" + PhylopCommon.SchemaVersion + " observed schema version: " + schemaVersion); } var dataVersion = _reader.ReadInt16(); if (dataVersion != PhylopCommon.DataVersion) { Console.WriteLine("WARNING: Expected phylop data version:" + PhylopCommon.DataVersion + " observed data version: " + dataVersion); } _genomeAssembly = (GenomeAssembly)_reader.ReadByte(); _version = DataSourceVersion.Read(_reader); // skip the reference name _reader.ReadString(); _intervalListPosition = _reader.ReadInt64(); CheckGuard(); LoadChromosomeIntervals(); IsInitialized = true; }
public NgaReader(Stream stream) { _nsaStream = stream; // read the whole file. Currently they are well under 2MB var compressedBytes = new byte[2 * 1024 * 1024]; var decompressedBytes = new byte[20 * 1024 * 1024]; var compressedSize = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length); var zstd = new Zstandard(); var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length); _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize); _reader = new ExtendedBinaryReader(_memStream); Version = DataSourceVersion.Read(_reader); JsonKey = _reader.ReadAsciiString(); _isArray = _reader.ReadBoolean(); ushort schemaVersion = _reader.ReadOptUInt16(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}"); } }