public void Write(Dictionary <string, List <ISuppGeneItem> > geneToEntries) { using (var memStream = new MemoryStream()) using (var memWriter = new ExtendedBinaryWriter(memStream)) using (var writer = new BinaryWriter(_nsaStream)) { _version.Write(memWriter); memWriter.WriteOptAscii(_jsonKey); memWriter.Write(_isArray); memWriter.WriteOpt(_schemaVersion); memWriter.WriteOpt(geneToEntries.Count); foreach ((string geneSymbol, var entries) in geneToEntries) { memWriter.WriteOptAscii(geneSymbol); memWriter.WriteOpt(entries.Count); foreach (ISuppGeneItem geneItem in entries) { memWriter.Write(geneItem.GetJsonString()); } } var uncompressedBytes = memStream.ToArray(); var compressedBytes = new byte[uncompressedBytes.Length + 32]; var compressor = new Zstandard(); var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes, compressedBytes.Length); writer.Write(compressedBytes, 0, compressSize); Console.WriteLine("Number of gene entries written:" + geneToEntries.Count); } }
private void WriteHeader() { _writer.Write(OmimDatabaseCommon.DataHeader); _writer.Write(OmimDatabaseCommon.SchemaVersion); _writer.Write(DateTime.UtcNow.Ticks); _version.Write(_writer); _writer.Write(OmimDatabaseCommon.GuardInt); }
private void WriteHeader() { _writer.Write(CustomIntervalCommon.DataHeader); _writer.Write(CustomIntervalCommon.SchemaVersion); _writer.Write(DateTime.UtcNow.Ticks); _writer.Write(_referenceName); _writer.Write(_intervalType); _version.Write(_writer); // marking end of header with guard int _writer.Write(CustomIntervalCommon.GuardInt); }
private void WriteHeader(DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion) { using (var writer = new ExtendedBinaryWriter(_stream, Encoding.UTF8, true)) { writer.WriteOptAscii(SaCommon.NsiIdentifier); version.Write(writer); writer.Write((byte)assembly); writer.WriteOptAscii(jsonKey); writer.Write((byte)reportFor); writer.Write(schemaVersion); writer.Write(SaCommon.GuardInt); } }
public NsiWriter(BinaryWriter writer, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion) { _writer = writer; _memStream = new MemoryStream(); _memWriter = new ExtendedBinaryWriter(_memStream); version.Write(_memWriter); _memWriter.Write((byte)assembly); _memWriter.WriteOptAscii(jsonKey); _memWriter.Write((byte)reportFor); _memWriter.WriteOpt(schemaVersion); }
public ChunkedIndex(ExtendedBinaryWriter indexWriter, GenomeAssembly assembly, DataSourceVersion version, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional) { _writer = indexWriter; MatchByAllele = matchByAllele; JsonKey = jsonKey; Version = version; Assembly = assembly; IsArray = isArray; IsPositional = isPositional; indexWriter.Write((byte)assembly); version.Write(indexWriter); indexWriter.WriteOptAscii(jsonKey); indexWriter.Write(matchByAllele); indexWriter.Write(isArray); indexWriter.WriteOpt(schemaVersion); indexWriter.Write(isPositional); _chromChunks = new Dictionary <ushort, List <Chunk> >(); }
/// <summary> /// Writeout the header of the nirvana phylop database /// </summary> private void WriteHeader() { _writer.Write(PhylopCommon.Header); _writer.Write(PhylopCommon.SchemaVersion); _writer.Write(PhylopCommon.DataVersion); _writer.Write((byte)_genomeAssembly); if (_version == null) { throw new MissingFieldException("Phylop data version cannot be null"); } _version.Write(_writer); _writer.Write(_refSeqName); // space holder for chromosome interval list position _intervalListOffset = _writer.BaseStream.Position; _writer.Write(_intervalListPosition); // this is just a temp value. We will come back and write the real one before closing _writer.Write(CacheConstants.GuardInt); }
public void Write(IEnumerable <ProteinConservationItem> items) { if (items == null) { return; } _writer.WriteOpt(ProteinConservationCommon.SchemaVersion); _writer.Write((byte)_assembly); _version.Write(_writer); var alignedProteinsAndScores = GetProteinWithUniqueScores(items); var nirvanaProteins = new HashSet <string>(_transcriptCacheData.PeptideSeqs); CheckProteinSetOverlap(alignedProteinsAndScores, nirvanaProteins); var transcriptScores = new Dictionary <string, byte[]>(); //protein sequence -> transcript ids mapping var transcriptGroupsByProtein = new Dictionary <string, List <string> >(alignedProteinsAndScores.Count); foreach (var protein in alignedProteinsAndScores.Keys) { transcriptGroupsByProtein.Add(protein, new List <string>()); } foreach (var transcriptIntervalArray in _transcriptCacheData.TranscriptIntervalArrays) { if (transcriptIntervalArray == null) { continue; //may happen since for GRCh38 decoy contigs, there may be none } foreach (var transcriptInterval in transcriptIntervalArray.Array) { var transcript = transcriptInterval.Value; if (transcript.Translation == null) { continue; } var peptideSeq = transcript.Translation.PeptideSeq; if (!alignedProteinsAndScores.TryGetValue(transcript.Translation.PeptideSeq, out var scores)) { continue; } transcriptScores.TryAdd(transcript.Id.WithVersion, scores); transcriptGroupsByProtein[peptideSeq].Add(transcript.Id.WithVersion); } } foreach (var(transcriptId, scores) in transcriptScores) { var transcriptScore = new TranscriptConservationScores(transcriptId, scores); transcriptScore.Write(_writer); } WriteTranscriptGroups(transcriptGroupsByProtein); Console.WriteLine($"Recorded conservation scores for {transcriptScores.Count} transcripts."); //writing an empty item to indicate end of records var endOfRecordItem = TranscriptConservationScores.GetEmptyItem(); endOfRecordItem.Write(_writer); }