public NpdReader(Stream dbStream, Stream indexStream) { _dbStream = dbStream; _indexStream = indexStream; _reader = new ExtendedBinaryReader(dbStream); _index = new NpdIndex(new ExtendedBinaryReader(indexStream)); Assembly = _index.Assembly; Version = _index.Version; if (_index.SchemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion}"); } var scoreMap = new Dictionary <byte, double>(); foreach ((double score, byte code) in _index.ScoreMap) { scoreMap.Add(code, score); } _scoreMap = scoreMap.ToImmutableDictionary(); _zstd = new Zstandard(); _scores = new byte[NpdIndex.MaxChromLength]; }
public void Write(Dictionary <string, List <ISuppGeneItem> > geneToEntries) { using (var memStream = new MemoryStream()) using (var memWriter = new ExtendedBinaryWriter(memStream)) using (var writer = new BinaryWriter(_nsaStream)) { _version.Write(memWriter); memWriter.WriteOptAscii(_jsonKey); memWriter.Write(_isArray); memWriter.WriteOpt(_schemaVersion); memWriter.WriteOpt(geneToEntries.Count); foreach ((string geneSymbol, var entries) in geneToEntries) { memWriter.WriteOptAscii(geneSymbol); memWriter.WriteOpt(entries.Count); foreach (ISuppGeneItem geneItem in entries) { memWriter.Write(geneItem.GetJsonString()); } } var uncompressedBytes = memStream.ToArray(); var compressedBytes = new byte[uncompressedBytes.Length + 32]; var compressor = new Zstandard(); var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes, compressedBytes.Length); writer.Write(compressedBytes, 0, compressSize); Console.WriteLine("Number of gene entries written:" + geneToEntries.Count); } }
public int Write(IEnumerable <ISuppIntervalItem> siItems) { var sortedItems = siItems.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End).ToList(); Console.WriteLine($"Writing {sortedItems.Count} intervals to database..."); _memWriter.WriteOpt(sortedItems.Count); foreach (ISuppIntervalItem item in sortedItems) { _memWriter.WriteOptAscii(item.Chromosome.EnsemblName); _memWriter.WriteOptAscii(item.Chromosome.UcscName); _memWriter.WriteOpt(item.Chromosome.Index); _memWriter.WriteOpt(item.Start); _memWriter.WriteOpt(item.End); _memWriter.Write(item.GetJsonString()); } var uncompressedBytes = _memStream.ToArray(); var compressedBytes = new byte[uncompressedBytes.Length + 32]; var compressor = new Zstandard(); var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes, compressedBytes.Length); _writer.Write(compressedBytes, 0, compressSize); _writer.Flush(); return(sortedItems.Count); }
public NsiReader(Stream stream) { _stream = stream; var compressData = new byte[MaxStreamLength]; int length = stream.Read(compressData, 0, MaxStreamLength); //uncompress var zstd = new Zstandard(); var decompressedLength = zstd.GetDecompressedLength(compressData, length); var decompressedData = new byte[decompressedLength]; zstd.Decompress(compressData, length, decompressedData, decompressedLength); using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Version = DataSourceVersion.Read(memReader); Assembly = (GenomeAssembly)memReader.ReadByte(); JsonKey = memReader.ReadAsciiString(); ReportFor = (ReportFor)memReader.ReadByte(); int schemaVersion = memReader.ReadOptInt32(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}"); } int count = memReader.ReadOptInt32(); var suppIntervals = new Dictionary <ushort, List <Interval <string> > >(); for (var i = 0; i < count; i++) { var saInterval = new SuppInterval(memReader); if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) { intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())); } else { suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > { new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) } }; } _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count); foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals) { _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray()); } } }
public NpdWriter(Stream dbStream, Stream indexStream, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, int schemaVersion) { _writer = new ExtendedBinaryWriter(dbStream); _index = new NpdIndex(indexStream, assembly, version, jsonKey, schemaVersion); _scoreMap = new Dictionary <double, byte>(byte.MaxValue); _scores = new byte[NpdIndex.MaxChromLength]; _memStream = new MemoryStream(_scores); _memWriter = new ExtendedBinaryWriter(_memStream); _zstd = new Zstandard(); _compressedScores = new byte[_zstd.GetCompressedBufferBounds(_scores.Length)]; }
public NgaReader(Stream stream) { _nsaStream = stream; // read the whole file. Currently they are well under 2MB var compressedBytes = new byte[2 * 1024 * 1024]; var decompressedBytes = new byte[20 * 1024 * 1024]; var compressedSize = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length); var zstd = new Zstandard(); var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length); _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize); _reader = new ExtendedBinaryReader(_memStream); Version = DataSourceVersion.Read(_reader); JsonKey = _reader.ReadAsciiString(); _isArray = _reader.ReadBoolean(); ushort schemaVersion = _reader.ReadOptUInt16(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}"); } }