Ejemplo n.º 1
0
        private static (IDataSourceVersion, GenomeAssembly, string, ReportFor, int) ReadHeader(Stream stream)
        {
            using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))
            {
                var identifier = reader.ReadAsciiString();
                if (identifier != SaCommon.NsiIdentifier)
                {
                    throw new InvalidDataException($"Failed to find identifier!!Expected: {SaCommon.NsiIdentifier}, observed:{identifier}");
                }

                var version       = DataSourceVersion.Read(reader);
                var assembly      = (GenomeAssembly)reader.ReadByte();
                var jsonKey       = reader.ReadAsciiString();
                var reportFor     = (ReportFor)reader.ReadByte();
                int schemaVersion = reader.ReadInt32();

                var guard = reader.ReadUInt32();
                if (guard != SaCommon.GuardInt)
                {
                    throw new InvalidDataException($"Failed to find guard int!!Expected: {SaCommon.GuardInt}, observed:{guard}");
                }

                return(version, assembly, jsonKey, reportFor, schemaVersion);
            }
        }
Ejemplo n.º 2
0
        //constructors
        public CosmicItem(ExtendedBinaryReader reader)
        {
            if (reader == null)
            {
                return;
            }

            ID          = reader.ReadAsciiString();
            SaAltAllele = reader.ReadAsciiString();
            AltAllele   = SupplementaryAnnotationUtilities.ReverseSaReducedAllele(SaAltAllele);
            RefAllele   = reader.ReadAsciiString();
            Gene        = reader.ReadAsciiString();
            SampleCount = reader.ReadOptNullableInt32();

            var countStudy = reader.ReadOptInt32();

            if (countStudy > 0)
            {
                Studies = new HashSet <CosmicStudy>();
            }

            for (var i = 0; i < countStudy; i++)
            {
                Studies.Add(new CosmicStudy(reader));
            }
        }
Ejemplo n.º 3
0
 public void Read(ExtendedBinaryReader reader)
 {
     NumEvsSamples = reader.ReadAsciiString();
     EvsCoverage   = reader.ReadAsciiString();
     EvsAfr        = reader.ReadAsciiString();
     EvsAll        = reader.ReadAsciiString();
     EvsEur        = reader.ReadAsciiString();
 }
Ejemplo n.º 4
0
        public static ReferenceMetadata Read(ExtendedBinaryReader reader)
        {
            var ucscName    = reader.ReadAsciiString();
            var ensemblName = reader.ReadAsciiString();

            reader.ReadBoolean(); // TODO: Remove this when we update the reference file format

            return(new ReferenceMetadata(ucscName, ensemblName));
        }
Ejemplo n.º 5
0
        public static ReferenceMetadata Read(ExtendedBinaryReader reader)
        {
            string ucscName    = reader.ReadAsciiString();
            string ensemblName = reader.ReadAsciiString();

            reader.ReadBoolean();

            return(new ReferenceMetadata(ucscName, ensemblName));
        }
        internal DataSourceVersion(ExtendedBinaryReader reader)
        {
            Name             = reader.ReadAsciiString();
            Version          = reader.ReadAsciiString();
            ReleaseDateTicks = reader.ReadOptInt64();
            Description      = reader.ReadAsciiString();

            _hashCode = Name.GetHashCode() ^ Version.GetHashCode() ^ ReleaseDateTicks.GetHashCode();
        }
Ejemplo n.º 7
0
        public static IDataSourceVersion Read(ExtendedBinaryReader reader)
        {
            var name             = reader.ReadAsciiString();
            var version          = reader.ReadAsciiString();
            var releaseDateTicks = reader.ReadOptInt64();
            var description      = reader.ReadAsciiString();

            return(new DataSourceVersion(name, version, releaseDateTicks, description));
        }
Ejemplo n.º 8
0
        public static IChromosome Read(ExtendedBinaryReader reader)
        {
            string ucscName         = reader.ReadAsciiString();
            string ensemblName      = reader.ReadAsciiString();
            string refseqAccession  = reader.ReadAsciiString();
            string genBankAccession = reader.ReadAsciiString();
            int    length           = reader.ReadOptInt32();
            ushort refIndex         = reader.ReadOptUInt16();

            return(new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex));
        }
Ejemplo n.º 9
0
        public SuppInterval(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();

            Chromosome = new Chromosome(ucscName, ensemblName, chromIndex);

            Start       = reader.ReadOptInt32();
            End         = reader.ReadOptInt32();
            _jsonString = reader.ReadString();
        }
Ejemplo n.º 10
0
        public static SuppInterval Read(ExtendedBinaryReader reader)
        {
            string ensemblName = reader.ReadAsciiString();
            string ucscName    = reader.ReadAsciiString();
            ushort chromIndex  = reader.ReadOptUInt16();
            var    chromosome  = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex);

            var start      = reader.ReadOptInt32();
            var end        = reader.ReadOptInt32();
            var jsonString = reader.ReadString();

            return(new SuppInterval(chromosome, start, end, jsonString));
        }
Ejemplo n.º 11
0
        public static OmimAnnotation Read(ExtendedBinaryReader reader)
        {
            var hgnc           = reader.ReadAsciiString();
            var description    = reader.ReadAsciiString();
            var mimNumber      = reader.ReadOptInt64();
            var phenotypeCount = reader.ReadOptInt32();
            var phenotypes     = new List <Phenotype>();

            for (var i = 0; i < phenotypeCount; i++)
            {
                phenotypes.Add(Phenotype.ReadPhenotype(reader));
            }

            return(new OmimAnnotation(hgnc, description, mimNumber, phenotypes));
        }
Ejemplo n.º 12
0
        private void PreLoad(IChromosome chrom)
        {
            _annotations.Clear();
            _chromosome = chrom;

            (long startLocation, int numBytes, int refMinorCount) = _index.GetFileRange(chrom.Index);
            if (startLocation == -1)
            {
                return;
            }
            _reader.BaseStream.Position = startLocation;
            var buffer = _reader.ReadBytes(numBytes);

            using (var memStream = new MemoryStream(buffer))
                using (var reader = new ExtendedBinaryReader(memStream))
                {
                    for (var i = 0; i < refMinorCount; i++)
                    {
                        var position    = reader.ReadOptInt32();
                        var globalMajor = reader.ReadAsciiString();

                        _annotations[position] = globalMajor;
                    }
                }
        }
Ejemplo n.º 13
0
        public static NgaReader Read(Stream stream)
        {
            (IDataSourceVersion version, string jsonKey, bool isArray) = ReadHeader(stream);

            Dictionary <string, List <string> > geneSymbolToJsonStrings;

            using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress))
                using (var reader = new ExtendedBinaryReader(blockStream))
                {
                    int geneCount = reader.ReadOptInt32();
                    geneSymbolToJsonStrings = new Dictionary <string, List <string> >(geneCount);

                    for (var i = 0; i < geneCount; i++)
                    {
                        string geneSymbol = reader.ReadAsciiString();
                        int    numEntries = reader.ReadOptInt32();
                        var    entries    = new List <string>(numEntries);

                        for (var j = 0; j < numEntries; j++)
                        {
                            entries.Add(reader.ReadString());
                        }

                        geneSymbolToJsonStrings[geneSymbol] = entries;
                    }
                }

            return(new NgaReader(version, jsonKey, isArray, geneSymbolToJsonStrings));
        }
Ejemplo n.º 14
0
        public NsaIndex(Stream stream)
        {
            using (var memStream = new MemoryStream())
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    stream.CopyTo(memStream);//reading all bytes in stream to memStream
                    memStream.Position = 0;

                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader));
                        }
                    }
                }
        }
Ejemplo n.º 15
0
        public ChunkedIndex(Stream stream)
        {
            //reading the index in one shot
            var buffer      = new byte[1048576];
            var indexLength = stream.Read(buffer, 0, 1048576);

            using (var memStream = new MemoryStream(buffer, 0, indexLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Assembly      = (GenomeAssembly)memReader.ReadByte();
                    Version       = DataSourceVersion.Read(memReader);
                    JsonKey       = memReader.ReadAsciiString();
                    MatchByAllele = memReader.ReadBoolean();
                    IsArray       = memReader.ReadBoolean();
                    SchemaVersion = memReader.ReadOptInt32();
                    IsPositional  = memReader.ReadBoolean();

                    var chromCount = memReader.ReadOptInt32();
                    _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount);
                    for (var i = 0; i < chromCount; i++)
                    {
                        var chromIndex = memReader.ReadOptUInt16();
                        var chunkCount = memReader.ReadOptInt32();
                        _chromChunks[chromIndex] = new List <Chunk>(chunkCount);
                        for (var j = 0; j < chunkCount; j++)
                        {
                            _chromChunks[chromIndex].Add(new Chunk(memReader));
                        }
                    }
                }
        }
Ejemplo n.º 16
0
        public void ReadItems_EndToEnd()
        {
            var expectedStrings = new[] { "Huey", "Duey", "Louie" };

            string[] observedStrings;

            using (var ms = new MemoryStream())
            {
                // ReSharper disable AccessToDisposedClosure
                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))
                {
                    TranscriptCacheWriter.WriteItems(writer, expectedStrings, x => writer.WriteOptAscii(x));
                }

                ms.Position = 0;

                using (var reader = new ExtendedBinaryReader(ms))
                {
                    observedStrings = TranscriptCacheReader.ReadItems(reader, () => reader.ReadAsciiString());
                }
                // ReSharper restore AccessToDisposedClosure
            }

            Assert.NotNull(observedStrings);
            Assert.Equal(expectedStrings, observedStrings);
        }
Ejemplo n.º 17
0
        public static IRnaEdit Read(ExtendedBinaryReader reader)
        {
            int    start = reader.ReadOptInt32();
            int    end   = reader.ReadOptInt32();
            string bases = reader.ReadAsciiString();

            return(new RnaEdit(start, end, bases));
        }
Ejemplo n.º 18
0
        // ReSharper disable once UnusedParameter.Local
        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local
        private static void CheckString(ExtendedBinaryReader reader, string expectedString)
        {
            string s = reader.ReadAsciiString();

            Assert.NotNull(s);
            Assert.Equal(expectedString.Length, s.Length);
            Assert.Equal(expectedString, s);
        }
Ejemplo n.º 19
0
        public ClinVarItem(ExtendedBinaryReader reader)
        {
            _alleleOrigins = reader.ReadOptArray(reader.ReadAsciiString)?.ToList();
            SaAltAllele    = reader.ReadAsciiString();
            AltAllele      = SaAltAllele != null?SupplementaryAnnotationUtilities.ReverseSaReducedAllele(SaAltAllele) : "";                 // A

            ReferenceAllele = reader.ReadAsciiString();
            ID               = reader.ReadAsciiString();
            ReviewStatus     = (ReviewStatusEnum)reader.ReadByte();
            IsAlleleSpecific = reader.ReadAsciiString();
            _medgenIds       = reader.ReadOptArray(reader.ReadAsciiString)?.ToList();
            _omimIds         = reader.ReadOptArray(reader.ReadAsciiString)?.ToList();
            _orphanetIds     = reader.ReadOptArray(reader.ReadAsciiString)?.ToList();
            _phenotypes      = reader.ReadOptArray(reader.ReadUtf8String)?.ToList();
            Significance     = reader.ReadAsciiString();
            LastUpdatedDate  = reader.ReadOptInt64();
            _pubmedIds       = reader.ReadOptArray(reader.ReadOptInt64)?.ToList();
        }
        public static TranscriptConservationScores Read(ExtendedBinaryReader reader)
        {
            var id     = reader.ReadAsciiString();
            var count  = reader.ReadOptInt32();
            var scores = reader.ReadBytes(count);
            var item   = new TranscriptConservationScores(id, scores);

            return(item.IsEmpty() ? null : item);
        }
        public static void Read(ExtendedBinaryReader reader, SupplementaryAnnotationPosition sa)
        {
            sa.GlobalMinorAllele          = reader.ReadAsciiString();
            sa.GlobalMinorAlleleFrequency = reader.ReadAsciiString();
            sa.GlobalMajorAllele          = reader.ReadAsciiString();
            sa.GlobalMajorAlleleFrequency = reader.ReadAsciiString();

            // read the allele-specific records
            var numAlleles = reader.ReadOptInt32();

            for (var alleleIndex = 0; alleleIndex < numAlleles; alleleIndex++)
            {
                var allele = reader.ReadAsciiString();
                var asa    = AlleleSpecificAnnotation.Read(reader);
                sa.AlleleSpecificAnnotations[allele] = asa;
            }

            // read cosmic records
            var numCosmic = reader.ReadOptInt32();

            for (var i = 0; i < numCosmic; i++)
            {
                var cosmicItem = new CosmicItem(reader);
                sa.CosmicItems.Add(cosmicItem);
            }

            // read clinVar items
            var numClinVar = reader.ReadOptInt32();

            for (var i = 0; i < numClinVar; i++)
            {
                var clinVarItem = new ClinVarItem(reader);
                sa.ClinVarItems.Add(clinVarItem);
            }

            // read custom annotation items
            var numCustom = reader.ReadOptInt32();

            for (var i = 0; i < numCustom; i++)
            {
                var customItem = new CustomItem(reader);
                sa.CustomItems.Add(customItem);
            }
        }
Ejemplo n.º 22
0
        private static List <string> ReadStringLists(ExtendedBinaryReader reader, int count)
        {
            var values = new List <string>();

            for (var i = 0; i < count; i++)
            {
                values.Add(reader.ReadAsciiString());
            }
            return(values);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// checks if the footer is good
        /// </summary>
        private void CheckFooter()
        {
            const string expectedFooter = "EOF";
            var          footer         = _reader.ReadAsciiString();

            if (footer != expectedFooter)
            {
                throw new GeneralException($"The footer check failed for the OMIM databses ({_omimFile}): ID: exp: {expectedFooter} obs: {footer}");
            }
        }
Ejemplo n.º 24
0
            public static Phenotype ReadPhenotype(ExtendedBinaryReader reader)
            {
                var mimNumber    = reader.ReadOptInt32();
                var phenotype    = reader.ReadAsciiString();
                var mapping      = (Mapping)reader.ReadByte();
                var comments     = (Comments)reader.ReadByte();
                var inheritance  = reader.ReadOptArray(reader.ReadAsciiString);
                var inheritances = inheritance == null ? null : new HashSet <string>(inheritance);

                return(new Phenotype(mimNumber, phenotype, mapping, comments, inheritances));
            }
Ejemplo n.º 25
0
        /// <summary>
        /// parses the database cache file and populates the specified lists and interval trees
        /// </summary>
        public TranscriptCacheData Read(IDictionary <ushort, IChromosome> refIndexToChromosome)
        {
            var genes             = ReadItems(_reader, () => Gene.Read(_reader, refIndexToChromosome));
            var transcriptRegions = ReadItems(_reader, () => TranscriptRegion.Read(_reader));
            var mirnas            = ReadItems(_reader, () => Interval.Read(_reader));
            var peptideSeqs       = ReadItems(_reader, () => _reader.ReadAsciiString());
            var regulatoryRegions = ReadIntervals(_reader, () => RegulatoryRegion.Read(_reader, refIndexToChromosome));
            var transcripts       = ReadIntervals(_reader, () => Transcript.Read(_reader, refIndexToChromosome, genes, transcriptRegions, mirnas, peptideSeqs));

            return(new TranscriptCacheData(Header, genes, transcriptRegions, mirnas, peptideSeqs, transcripts, regulatoryRegions));
        }
Ejemplo n.º 26
0
        private string[] ReadStringArray()
        {
            var numStrings = _reader.ReadOptInt32();
            var strings    = new string[numStrings];

            for (int i = 0; i < numStrings; i++)
            {
                strings[i] = _reader.ReadAsciiString();
            }
            CheckGuard();
            return(strings);
        }
Ejemplo n.º 27
0
        public NsiReader(Stream stream)
        {
            _stream = stream;
            var compressData = new byte[MaxStreamLength];
            int length       = stream.Read(compressData, 0, MaxStreamLength);
            //uncompress
            var zstd = new Zstandard();
            var decompressedLength = zstd.GetDecompressedLength(compressData, length);
            var decompressedData   = new byte[decompressedLength];

            zstd.Decompress(compressData, length, decompressedData, decompressedLength);

            using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Version   = DataSourceVersion.Read(memReader);
                    Assembly  = (GenomeAssembly)memReader.ReadByte();
                    JsonKey   = memReader.ReadAsciiString();
                    ReportFor = (ReportFor)memReader.ReadByte();
                    int schemaVersion = memReader.ReadOptInt32();

                    if (schemaVersion != SaCommon.SchemaVersion)
                    {
                        throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}");
                    }


                    int count         = memReader.ReadOptInt32();
                    var suppIntervals = new Dictionary <ushort, List <Interval <string> > >();
                    for (var i = 0; i < count; i++)
                    {
                        var saInterval = new SuppInterval(memReader);
                        if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals))
                        {
                            intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));
                        }
                        else
                        {
                            suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > {
                                new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())
                            }
                        };
                    }

                    _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count);
                    foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals)
                    {
                        _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray());
                    }
                }
        }
Ejemplo n.º 28
0
        /// <summary>
        /// reads the gene data from the binary reader
        /// </summary>
        public static Gene Read(ExtendedBinaryReader reader)
        {
            ushort referenceIndex  = reader.ReadUInt16();
            int    start           = reader.ReadOptInt32();
            int    end             = reader.ReadOptInt32();
            bool   onReverseStrand = reader.ReadBoolean();
            string symbol          = reader.ReadAsciiString();
            int    hgncId          = reader.ReadOptInt32();
            var    entrezId        = CompactId.Read(reader);
            var    ensemblId       = CompactId.Read(reader);
            int    mimNumber       = reader.ReadOptInt32();

            return(new Gene(referenceIndex, start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId, mimNumber));
        }
Ejemplo n.º 29
0
        public JasixIndex(Stream stream) : this()
        {
            _stream = stream;
            using (var reader = new ExtendedBinaryReader(stream))
            {
                int version = reader.ReadOptInt32();
                if (version != JasixCommons.Version)
                {
                    throw new InvalidDataException($"Invalid Jasix version: Observed {version}, expected{JasixCommons.Version}");
                }

                int count = reader.ReadOptInt32();

                for (var i = 0; i < count; i++)
                {
                    var chrIndex = new JasixChrIndex(reader);
                    _chrIndices[chrIndex.ReferenceSequence] = chrIndex;
                }

                int synonymCount = reader.ReadOptInt32();
                for (var i = 0; i < synonymCount; i++)
                {
                    string synonym   = reader.ReadAsciiString();
                    string indexName = reader.ReadAsciiString();
                    _synonymToChrName[synonym] = indexName;
                }

                int sectionCount = reader.ReadOptInt32();
                for (var i = 0; i < sectionCount; i++)
                {
                    string sectionName = reader.ReadAsciiString();
                    long   begin       = reader.ReadOptInt64();
                    long   end         = reader.ReadOptInt64();
                    _sectionRanges[sectionName] = new FileRange(begin, end);
                }
            }
        }
Ejemplo n.º 30
0
        private static Band[] GetCytogeneticBands(ExtendedBinaryReader reader)
        {
            int numBands = reader.ReadOptInt32();
            var bands    = new Band[numBands];

            for (var i = 0; i < numBands; i++)
            {
                int    begin = reader.ReadOptInt32();
                int    end   = reader.ReadOptInt32();
                string name  = reader.ReadAsciiString();

                bands[i] = new Band(begin, end, name);
            }

            return(bands);
        }