Пример #1
0
        public NpdReader(Stream dbStream, Stream indexStream)
        {
            _dbStream    = dbStream;
            _indexStream = indexStream;
            _reader      = new ExtendedBinaryReader(dbStream);

            _index   = new NpdIndex(new ExtendedBinaryReader(indexStream));
            Assembly = _index.Assembly;
            Version  = _index.Version;

            if (_index.SchemaVersion != SaCommon.SchemaVersion)
            {
                throw new UserErrorException($"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion}");
            }

            var scoreMap = new Dictionary <byte, double>();

            foreach ((double score, byte code) in _index.ScoreMap)
            {
                scoreMap.Add(code, score);
            }

            _scoreMap = scoreMap.ToImmutableDictionary();
            _zstd     = new Zstandard();
            _scores   = new byte[NpdIndex.MaxChromLength];
        }
Пример #2
0
        public void Write(Dictionary <string, List <ISuppGeneItem> > geneToEntries)
        {
            using (var memStream = new MemoryStream())
                using (var memWriter = new ExtendedBinaryWriter(memStream))
                    using (var writer = new BinaryWriter(_nsaStream))
                    {
                        _version.Write(memWriter);
                        memWriter.WriteOptAscii(_jsonKey);
                        memWriter.Write(_isArray);
                        memWriter.WriteOpt(_schemaVersion);

                        memWriter.WriteOpt(geneToEntries.Count);
                        foreach ((string geneSymbol, var entries) in geneToEntries)
                        {
                            memWriter.WriteOptAscii(geneSymbol);
                            memWriter.WriteOpt(entries.Count);
                            foreach (ISuppGeneItem geneItem in entries)
                            {
                                memWriter.Write(geneItem.GetJsonString());
                            }
                        }

                        var uncompressedBytes = memStream.ToArray();
                        var compressedBytes   = new byte[uncompressedBytes.Length + 32];

                        var compressor   = new Zstandard();
                        var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes,
                                                               compressedBytes.Length);

                        writer.Write(compressedBytes, 0, compressSize);
                        Console.WriteLine("Number of gene entries written:" + geneToEntries.Count);
                    }
        }
Пример #3
0
        public int Write(IEnumerable <ISuppIntervalItem> siItems)
        {
            var sortedItems = siItems.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End).ToList();

            Console.WriteLine($"Writing {sortedItems.Count} intervals to database...");
            _memWriter.WriteOpt(sortedItems.Count);
            foreach (ISuppIntervalItem item in sortedItems)
            {
                _memWriter.WriteOptAscii(item.Chromosome.EnsemblName);
                _memWriter.WriteOptAscii(item.Chromosome.UcscName);
                _memWriter.WriteOpt(item.Chromosome.Index);
                _memWriter.WriteOpt(item.Start);
                _memWriter.WriteOpt(item.End);
                _memWriter.Write(item.GetJsonString());
            }

            var uncompressedBytes = _memStream.ToArray();
            var compressedBytes   = new byte[uncompressedBytes.Length + 32];

            var compressor   = new Zstandard();
            var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes,
                                                   compressedBytes.Length);

            _writer.Write(compressedBytes, 0, compressSize);
            _writer.Flush();
            return(sortedItems.Count);
        }
Пример #4
0
        public NsiReader(Stream stream)
        {
            _stream = stream;
            var compressData = new byte[MaxStreamLength];
            int length       = stream.Read(compressData, 0, MaxStreamLength);
            //uncompress
            var zstd = new Zstandard();
            var decompressedLength = zstd.GetDecompressedLength(compressData, length);
            var decompressedData   = new byte[decompressedLength];

            zstd.Decompress(compressData, length, decompressedData, decompressedLength);

            using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Version   = DataSourceVersion.Read(memReader);
                    Assembly  = (GenomeAssembly)memReader.ReadByte();
                    JsonKey   = memReader.ReadAsciiString();
                    ReportFor = (ReportFor)memReader.ReadByte();
                    int schemaVersion = memReader.ReadOptInt32();

                    if (schemaVersion != SaCommon.SchemaVersion)
                    {
                        throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}");
                    }


                    int count         = memReader.ReadOptInt32();
                    var suppIntervals = new Dictionary <ushort, List <Interval <string> > >();
                    for (var i = 0; i < count; i++)
                    {
                        var saInterval = new SuppInterval(memReader);
                        if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals))
                        {
                            intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));
                        }
                        else
                        {
                            suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > {
                                new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())
                            }
                        };
                    }

                    _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count);
                    foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals)
                    {
                        _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray());
                    }
                }
        }
Пример #5
0
        public NpdWriter(Stream dbStream, Stream indexStream, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, int schemaVersion)
        {
            _writer = new ExtendedBinaryWriter(dbStream);

            _index    = new NpdIndex(indexStream, assembly, version, jsonKey, schemaVersion);
            _scoreMap = new Dictionary <double, byte>(byte.MaxValue);

            _scores    = new byte[NpdIndex.MaxChromLength];
            _memStream = new MemoryStream(_scores);
            _memWriter = new ExtendedBinaryWriter(_memStream);
            _zstd      = new Zstandard();

            _compressedScores = new byte[_zstd.GetCompressedBufferBounds(_scores.Length)];
        }
Пример #6
0
        public NgaReader(Stream stream)
        {
            _nsaStream = stream;
            // read the whole file. Currently they are well under 2MB
            var compressedBytes   = new byte[2 * 1024 * 1024];
            var decompressedBytes = new byte[20 * 1024 * 1024];
            var compressedSize    = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length);

            var zstd             = new Zstandard();
            var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length);

            _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize);
            _reader    = new ExtendedBinaryReader(_memStream);

            Version  = DataSourceVersion.Read(_reader);
            JsonKey  = _reader.ReadAsciiString();
            _isArray = _reader.ReadBoolean();
            ushort schemaVersion = _reader.ReadOptUInt16();

            if (schemaVersion != SaCommon.SchemaVersion)
            {
                throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}");
            }
        }