Ejemplo n.º 1
0
        private static (IDataSourceVersion, GenomeAssembly, string, ReportFor, int) ReadHeader(Stream stream)
        {
            using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))
            {
                var identifier = reader.ReadAsciiString();
                if (identifier != SaCommon.NsiIdentifier)
                {
                    throw new InvalidDataException($"Failed to find identifier!!Expected: {SaCommon.NsiIdentifier}, observed:{identifier}");
                }

                var version       = DataSourceVersion.Read(reader);
                var assembly      = (GenomeAssembly)reader.ReadByte();
                var jsonKey       = reader.ReadAsciiString();
                var reportFor     = (ReportFor)reader.ReadByte();
                int schemaVersion = reader.ReadInt32();

                var guard = reader.ReadUInt32();
                if (guard != SaCommon.GuardInt)
                {
                    throw new InvalidDataException($"Failed to find guard int!!Expected: {SaCommon.GuardInt}, observed:{guard}");
                }

                return(version, assembly, jsonKey, reportFor, schemaVersion);
            }
        }
Ejemplo n.º 2
0
        public ChunkedIndex(Stream stream)
        {
            //reading the index in one shot
            var buffer      = new byte[1048576];
            var indexLength = stream.Read(buffer, 0, 1048576);

            using (var memStream = new MemoryStream(buffer, 0, indexLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromChunks[chromIndex] = new List <Chunk>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromChunks[chromIndex].Add(new Chunk(memReader));
                        }
                    }
                }
        }
Ejemplo n.º 3
0
        private void ReadHeader()
        {
            var header = _reader.ReadString();

            if (header != SaDataBaseCommon.DataHeader)
            {
                throw new FormatException("Unrecognized header in this database");
            }

            // ReSharper disable UnusedVariable
            var dataVersion = _reader.ReadUInt16();

            var schema = _reader.ReadUInt16();

            if (schema != SaDataBaseCommon.SchemaVersion)
            {
                throw new UserErrorException($"Gene database schema mismatch. Expected {SaDataBaseCommon.SchemaVersion}, observed {schema}");
            }

            var genomeAssembly = (GenomeAssembly)_reader.ReadByte();
            var creationTime   = _reader.ReadInt64();
            // ReSharper restore UnusedVariable

            var dataSourseVersionsCount = _reader.ReadOptInt32();

            for (var i = 0; i < dataSourseVersionsCount; i++)
            {
                DataSourceVersions.Add(DataSourceVersion.Read(_reader));
            }

            CheckGuard();
        }
Ejemplo n.º 4
0
        private static (IDataSourceVersion Version, string JsonKey, bool IsArray) ReadHeader(Stream stream)
        {
            IDataSourceVersion version;
            string             jsonKey;
            bool isArray;

            using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))
            {
                string identifier = reader.ReadString();

                if (identifier != SaCommon.NgaIdentifier)
                {
                    throw new InvalidDataException($"Expected the NGA identifier ({SaCommon.NgaIdentifier}), but found another value: ({identifier})");
                }

                version = DataSourceVersion.Read(reader);
                jsonKey = reader.ReadString();
                isArray = reader.ReadBoolean();
                ushort schemaVersion = reader.ReadUInt16();

                if (schemaVersion != SaCommon.SchemaVersion)
                {
                    throw new UserErrorException($"Expected the schema version {SaCommon.SchemaVersion}, but found another value: ({schemaVersion}) for {jsonKey}");
                }

                uint guard = reader.ReadUInt32();

                if (guard != SaCommon.GuardInt)
                {
                    throw new InvalidDataException($"Expected a guard integer ({SaCommon.GuardInt}), but found another value: ({guard})");
                }
            }

            return(version, jsonKey, isArray);
        }
Ejemplo n.º 5
0
        public NsaIndex(Stream stream)
        {
            using (var memStream = new MemoryStream())
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    stream.CopyTo(memStream);//reading all bytes in stream to memStream
                    memStream.Position = 0;

                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader));
                        }
                    }
                }
        }
        public ProteinConservationReader(Stream stream)
        {
            _reader = new ExtendedBinaryReader(stream);

            var schemaVersion = _reader.ReadOptInt32();

            if (schemaVersion != ProteinConservationCommon.SchemaVersion)
            {
                throw new Exception($"Schema version mismatch found. Observed: {schemaVersion}, expected: {ProteinConservationCommon.SchemaVersion}");
            }
            Assembly = (GenomeAssembly)_reader.ReadByte();
            Version  = DataSourceVersion.Read(_reader);
        }
Ejemplo n.º 7
0
        public NsiReader(Stream stream)
        {
            _stream = stream;
            var compressData = new byte[MaxStreamLength];
            int length       = stream.Read(compressData, 0, MaxStreamLength);
            //uncompress
            var zstd = new Zstandard();
            var decompressedLength = zstd.GetDecompressedLength(compressData, length);
            var decompressedData   = new byte[decompressedLength];

            zstd.Decompress(compressData, length, decompressedData, decompressedLength);

            using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Version   = DataSourceVersion.Read(memReader);
                    Assembly  = (GenomeAssembly)memReader.ReadByte();
                    JsonKey   = memReader.ReadAsciiString();
                    ReportFor = (ReportFor)memReader.ReadByte();
                    int schemaVersion = memReader.ReadOptInt32();

                    if (schemaVersion != SaCommon.SchemaVersion)
                    {
                        throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}");
                    }


                    int count         = memReader.ReadOptInt32();
                    var suppIntervals = new Dictionary <ushort, List <Interval <string> > >();
                    for (var i = 0; i < count; i++)
                    {
                        var saInterval = new SuppInterval(memReader);
                        if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals))
                        {
                            intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));
                        }
                        else
                        {
                            suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > {
                                new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())
                            }
                        };
                    }

                    _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count);
                    foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals)
                    {
                        _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray());
                    }
                }
        }
Ejemplo n.º 8
0
        private void LoadHeader()
        {
            var identifier = _reader.ReadString();

            if (identifier != PhylopCommon.Header)
            {
                throw new InvalidDataException("Unrecognized file header: " + identifier);
            }

            var schemaVersion = _reader.ReadInt16();

            if (schemaVersion != PhylopCommon.SchemaVersion)
            {
                throw new InvalidDataException("Expected phylop schema version:" + PhylopCommon.SchemaVersion + " observed schema version: " + schemaVersion);
            }

            var dataVersion = _reader.ReadInt16();

            if (dataVersion != PhylopCommon.DataVersion)
            {
                Console.WriteLine("WARNING: Expected phylop data version:" + PhylopCommon.DataVersion + " observed data version: " + dataVersion);
            }

            _genomeAssembly = (GenomeAssembly)_reader.ReadByte();
            _version        = DataSourceVersion.Read(_reader);

            // skip the reference name
            _reader.ReadString();

            _intervalListPosition = _reader.ReadInt64();

            CheckGuard();

            LoadChromosomeIntervals();
            IsInitialized = true;
        }
Ejemplo n.º 9
0
        public NgaReader(Stream stream)
        {
            _nsaStream = stream;
            // read the whole file. Currently they are well under 2MB
            var compressedBytes   = new byte[2 * 1024 * 1024];
            var decompressedBytes = new byte[20 * 1024 * 1024];
            var compressedSize    = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length);

            var zstd             = new Zstandard();
            var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length);

            _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize);
            _reader    = new ExtendedBinaryReader(_memStream);

            Version  = DataSourceVersion.Read(_reader);
            JsonKey  = _reader.ReadAsciiString();
            _isArray = _reader.ReadBoolean();
            ushort schemaVersion = _reader.ReadOptUInt16();

            if (schemaVersion != SaCommon.SchemaVersion)
            {
                throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}");
            }
        }