Пример #1
0
        public void Write(Dictionary <string, List <ISuppGeneItem> > geneToEntries)
        {
            using (var memStream = new MemoryStream())
                using (var memWriter = new ExtendedBinaryWriter(memStream))
                    using (var writer = new BinaryWriter(_nsaStream))
                    {
                        _version.Write(memWriter);
                        memWriter.WriteOptAscii(_jsonKey);
                        memWriter.Write(_isArray);
                        memWriter.WriteOpt(_schemaVersion);

                        memWriter.WriteOpt(geneToEntries.Count);
                        foreach ((string geneSymbol, var entries) in geneToEntries)
                        {
                            memWriter.WriteOptAscii(geneSymbol);
                            memWriter.WriteOpt(entries.Count);
                            foreach (ISuppGeneItem geneItem in entries)
                            {
                                memWriter.Write(geneItem.GetJsonString());
                            }
                        }

                        var uncompressedBytes = memStream.ToArray();
                        var compressedBytes   = new byte[uncompressedBytes.Length + 32];

                        var compressor   = new Zstandard();
                        var compressSize = compressor.Compress(uncompressedBytes, uncompressedBytes.Length, compressedBytes,
                                                               compressedBytes.Length);

                        writer.Write(compressedBytes, 0, compressSize);
                        Console.WriteLine("Number of gene entries written:" + geneToEntries.Count);
                    }
        }
Пример #2
0
        private void WriteHeader()
        {
            _writer.Write(OmimDatabaseCommon.DataHeader);
            _writer.Write(OmimDatabaseCommon.SchemaVersion);
            _writer.Write(DateTime.UtcNow.Ticks);


            _version.Write(_writer);
            _writer.Write(OmimDatabaseCommon.GuardInt);

        }
Пример #3
0
        private void WriteHeader()
        {
            _writer.Write(CustomIntervalCommon.DataHeader);
            _writer.Write(CustomIntervalCommon.SchemaVersion);
            _writer.Write(DateTime.UtcNow.Ticks);
            _writer.Write(_referenceName);
            _writer.Write(_intervalType);

            _version.Write(_writer);

            // marking end of header with guard int
            _writer.Write(CustomIntervalCommon.GuardInt);
        }
Пример #4
0
 private void WriteHeader(DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion)
 {
     using (var writer = new ExtendedBinaryWriter(_stream, Encoding.UTF8, true))
     {
         writer.WriteOptAscii(SaCommon.NsiIdentifier);
         version.Write(writer);
         writer.Write((byte)assembly);
         writer.WriteOptAscii(jsonKey);
         writer.Write((byte)reportFor);
         writer.Write(schemaVersion);
         writer.Write(SaCommon.GuardInt);
     }
 }
Пример #5
0
        public NsiWriter(BinaryWriter writer, DataSourceVersion version,
                         GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion)
        {
            _writer    = writer;
            _memStream = new MemoryStream();
            _memWriter = new ExtendedBinaryWriter(_memStream);

            version.Write(_memWriter);
            _memWriter.Write((byte)assembly);
            _memWriter.WriteOptAscii(jsonKey);
            _memWriter.Write((byte)reportFor);
            _memWriter.WriteOpt(schemaVersion);
        }
Пример #6
0
        public ChunkedIndex(ExtendedBinaryWriter indexWriter, GenomeAssembly assembly, DataSourceVersion version, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional)
        {
            _writer       = indexWriter;
            MatchByAllele = matchByAllele;
            JsonKey       = jsonKey;
            Version       = version;
            Assembly      = assembly;
            IsArray       = isArray;
            IsPositional  = isPositional;

            indexWriter.Write((byte)assembly);
            version.Write(indexWriter);
            indexWriter.WriteOptAscii(jsonKey);
            indexWriter.Write(matchByAllele);
            indexWriter.Write(isArray);
            indexWriter.WriteOpt(schemaVersion);
            indexWriter.Write(isPositional);

            _chromChunks = new Dictionary <ushort, List <Chunk> >();
        }
Пример #7
0
        /// <summary>
        /// Writeout the header of the nirvana phylop database
        /// </summary>
        private void WriteHeader()
        {
            _writer.Write(PhylopCommon.Header);
            _writer.Write(PhylopCommon.SchemaVersion);
            _writer.Write(PhylopCommon.DataVersion);

            _writer.Write((byte)_genomeAssembly);

            if (_version == null)
            {
                throw new MissingFieldException("Phylop data version cannot be null");
            }

            _version.Write(_writer);
            _writer.Write(_refSeqName);

            // space holder for chromosome interval list position
            _intervalListOffset = _writer.BaseStream.Position;
            _writer.Write(_intervalListPosition); // this is just a temp value. We will come back and write the real one before closing
            _writer.Write(CacheConstants.GuardInt);
        }
        public void Write(IEnumerable <ProteinConservationItem> items)
        {
            if (items == null)
            {
                return;
            }
            _writer.WriteOpt(ProteinConservationCommon.SchemaVersion);
            _writer.Write((byte)_assembly);
            _version.Write(_writer);

            var alignedProteinsAndScores = GetProteinWithUniqueScores(items);
            var nirvanaProteins          = new HashSet <string>(_transcriptCacheData.PeptideSeqs);

            CheckProteinSetOverlap(alignedProteinsAndScores, nirvanaProteins);

            var transcriptScores = new Dictionary <string, byte[]>();
            //protein sequence -> transcript ids mapping
            var transcriptGroupsByProtein = new Dictionary <string, List <string> >(alignedProteinsAndScores.Count);

            foreach (var protein in alignedProteinsAndScores.Keys)
            {
                transcriptGroupsByProtein.Add(protein, new List <string>());
            }
            foreach (var transcriptIntervalArray in _transcriptCacheData.TranscriptIntervalArrays)
            {
                if (transcriptIntervalArray == null)
                {
                    continue;                                 //may happen since for GRCh38 decoy contigs, there may be none
                }
                foreach (var transcriptInterval in transcriptIntervalArray.Array)
                {
                    var transcript = transcriptInterval.Value;
                    if (transcript.Translation == null)
                    {
                        continue;
                    }
                    var peptideSeq = transcript.Translation.PeptideSeq;
                    if (!alignedProteinsAndScores.TryGetValue(transcript.Translation.PeptideSeq, out var scores))
                    {
                        continue;
                    }

                    transcriptScores.TryAdd(transcript.Id.WithVersion, scores);
                    transcriptGroupsByProtein[peptideSeq].Add(transcript.Id.WithVersion);
                }
            }

            foreach (var(transcriptId, scores) in transcriptScores)
            {
                var transcriptScore = new TranscriptConservationScores(transcriptId, scores);
                transcriptScore.Write(_writer);
            }

            WriteTranscriptGroups(transcriptGroupsByProtein);

            Console.WriteLine($"Recorded conservation scores for {transcriptScores.Count} transcripts.");
            //writing an empty item to indicate end of records
            var endOfRecordItem = TranscriptConservationScores.GetEmptyItem();

            endOfRecordItem.Write(_writer);
        }