private static (IDataSourceVersion, GenomeAssembly, string, ReportFor, int) ReadHeader(Stream stream) { using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true)) { var identifier = reader.ReadAsciiString(); if (identifier != SaCommon.NsiIdentifier) { throw new InvalidDataException($"Failed to find identifier!!Expected: {SaCommon.NsiIdentifier}, observed:{identifier}"); } var version = DataSourceVersion.Read(reader); var assembly = (GenomeAssembly)reader.ReadByte(); var jsonKey = reader.ReadAsciiString(); var reportFor = (ReportFor)reader.ReadByte(); int schemaVersion = reader.ReadInt32(); var guard = reader.ReadUInt32(); if (guard != SaCommon.GuardInt) { throw new InvalidDataException($"Failed to find guard int!!Expected: {SaCommon.GuardInt}, observed:{guard}"); } return(version, assembly, jsonKey, reportFor, schemaVersion); } }
//constructors public CosmicItem(ExtendedBinaryReader reader) { if (reader == null) { return; } ID = reader.ReadAsciiString(); SaAltAllele = reader.ReadAsciiString(); AltAllele = SupplementaryAnnotationUtilities.ReverseSaReducedAllele(SaAltAllele); RefAllele = reader.ReadAsciiString(); Gene = reader.ReadAsciiString(); SampleCount = reader.ReadOptNullableInt32(); var countStudy = reader.ReadOptInt32(); if (countStudy > 0) { Studies = new HashSet <CosmicStudy>(); } for (var i = 0; i < countStudy; i++) { Studies.Add(new CosmicStudy(reader)); } }
public void Read(ExtendedBinaryReader reader) { NumEvsSamples = reader.ReadAsciiString(); EvsCoverage = reader.ReadAsciiString(); EvsAfr = reader.ReadAsciiString(); EvsAll = reader.ReadAsciiString(); EvsEur = reader.ReadAsciiString(); }
public static ReferenceMetadata Read(ExtendedBinaryReader reader) { var ucscName = reader.ReadAsciiString(); var ensemblName = reader.ReadAsciiString(); reader.ReadBoolean(); // TODO: Remove this when we update the reference file format return(new ReferenceMetadata(ucscName, ensemblName)); }
public static ReferenceMetadata Read(ExtendedBinaryReader reader) { string ucscName = reader.ReadAsciiString(); string ensemblName = reader.ReadAsciiString(); reader.ReadBoolean(); return(new ReferenceMetadata(ucscName, ensemblName)); }
internal DataSourceVersion(ExtendedBinaryReader reader) { Name = reader.ReadAsciiString(); Version = reader.ReadAsciiString(); ReleaseDateTicks = reader.ReadOptInt64(); Description = reader.ReadAsciiString(); _hashCode = Name.GetHashCode() ^ Version.GetHashCode() ^ ReleaseDateTicks.GetHashCode(); }
public static IDataSourceVersion Read(ExtendedBinaryReader reader) { var name = reader.ReadAsciiString(); var version = reader.ReadAsciiString(); var releaseDateTicks = reader.ReadOptInt64(); var description = reader.ReadAsciiString(); return(new DataSourceVersion(name, version, releaseDateTicks, description)); }
public static IChromosome Read(ExtendedBinaryReader reader) { string ucscName = reader.ReadAsciiString(); string ensemblName = reader.ReadAsciiString(); string refseqAccession = reader.ReadAsciiString(); string genBankAccession = reader.ReadAsciiString(); int length = reader.ReadOptInt32(); ushort refIndex = reader.ReadOptUInt16(); return(new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex)); }
public SuppInterval(ExtendedBinaryReader reader) { string ensemblName = reader.ReadAsciiString(); string ucscName = reader.ReadAsciiString(); ushort chromIndex = reader.ReadOptUInt16(); Chromosome = new Chromosome(ucscName, ensemblName, chromIndex); Start = reader.ReadOptInt32(); End = reader.ReadOptInt32(); _jsonString = reader.ReadString(); }
public static SuppInterval Read(ExtendedBinaryReader reader) { string ensemblName = reader.ReadAsciiString(); string ucscName = reader.ReadAsciiString(); ushort chromIndex = reader.ReadOptUInt16(); var chromosome = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex); var start = reader.ReadOptInt32(); var end = reader.ReadOptInt32(); var jsonString = reader.ReadString(); return(new SuppInterval(chromosome, start, end, jsonString)); }
public static OmimAnnotation Read(ExtendedBinaryReader reader) { var hgnc = reader.ReadAsciiString(); var description = reader.ReadAsciiString(); var mimNumber = reader.ReadOptInt64(); var phenotypeCount = reader.ReadOptInt32(); var phenotypes = new List <Phenotype>(); for (var i = 0; i < phenotypeCount; i++) { phenotypes.Add(Phenotype.ReadPhenotype(reader)); } return(new OmimAnnotation(hgnc, description, mimNumber, phenotypes)); }
private void PreLoad(IChromosome chrom) { _annotations.Clear(); _chromosome = chrom; (long startLocation, int numBytes, int refMinorCount) = _index.GetFileRange(chrom.Index); if (startLocation == -1) { return; } _reader.BaseStream.Position = startLocation; var buffer = _reader.ReadBytes(numBytes); using (var memStream = new MemoryStream(buffer)) using (var reader = new ExtendedBinaryReader(memStream)) { for (var i = 0; i < refMinorCount; i++) { var position = reader.ReadOptInt32(); var globalMajor = reader.ReadAsciiString(); _annotations[position] = globalMajor; } } }
public static NgaReader Read(Stream stream) { (IDataSourceVersion version, string jsonKey, bool isArray) = ReadHeader(stream); Dictionary <string, List <string> > geneSymbolToJsonStrings; using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress)) using (var reader = new ExtendedBinaryReader(blockStream)) { int geneCount = reader.ReadOptInt32(); geneSymbolToJsonStrings = new Dictionary <string, List <string> >(geneCount); for (var i = 0; i < geneCount; i++) { string geneSymbol = reader.ReadAsciiString(); int numEntries = reader.ReadOptInt32(); var entries = new List <string>(numEntries); for (var j = 0; j < numEntries; j++) { entries.Add(reader.ReadString()); } geneSymbolToJsonStrings[geneSymbol] = entries; } } return(new NgaReader(version, jsonKey, isArray, geneSymbolToJsonStrings)); }
public NsaIndex(Stream stream) { using (var memStream = new MemoryStream()) using (var memReader = new ExtendedBinaryReader(memStream)) { stream.CopyTo(memStream);//reading all bytes in stream to memStream memStream.Position = 0; Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromBlocks = new Dictionary <ushort, List <NsaIndexBlock> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromBlocks[chromIndex] = new List <NsaIndexBlock>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader)); } } } }
public ChunkedIndex(Stream stream) { //reading the index in one shot var buffer = new byte[1048576]; var indexLength = stream.Read(buffer, 0, 1048576); using (var memStream = new MemoryStream(buffer, 0, indexLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Assembly = (GenomeAssembly)memReader.ReadByte(); Version = DataSourceVersion.Read(memReader); JsonKey = memReader.ReadAsciiString(); MatchByAllele = memReader.ReadBoolean(); IsArray = memReader.ReadBoolean(); SchemaVersion = memReader.ReadOptInt32(); IsPositional = memReader.ReadBoolean(); var chromCount = memReader.ReadOptInt32(); _chromChunks = new Dictionary <ushort, List <Chunk> >(chromCount); for (var i = 0; i < chromCount; i++) { var chromIndex = memReader.ReadOptUInt16(); var chunkCount = memReader.ReadOptInt32(); _chromChunks[chromIndex] = new List <Chunk>(chunkCount); for (var j = 0; j < chunkCount; j++) { _chromChunks[chromIndex].Add(new Chunk(memReader)); } } } }
public void ReadItems_EndToEnd() { var expectedStrings = new[] { "Huey", "Duey", "Louie" }; string[] observedStrings; using (var ms = new MemoryStream()) { // ReSharper disable AccessToDisposedClosure using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) { TranscriptCacheWriter.WriteItems(writer, expectedStrings, x => writer.WriteOptAscii(x)); } ms.Position = 0; using (var reader = new ExtendedBinaryReader(ms)) { observedStrings = TranscriptCacheReader.ReadItems(reader, () => reader.ReadAsciiString()); } // ReSharper restore AccessToDisposedClosure } Assert.NotNull(observedStrings); Assert.Equal(expectedStrings, observedStrings); }
public static IRnaEdit Read(ExtendedBinaryReader reader) { int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); string bases = reader.ReadAsciiString(); return(new RnaEdit(start, end, bases)); }
// ReSharper disable once UnusedParameter.Local // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local private static void CheckString(ExtendedBinaryReader reader, string expectedString) { string s = reader.ReadAsciiString(); Assert.NotNull(s); Assert.Equal(expectedString.Length, s.Length); Assert.Equal(expectedString, s); }
public ClinVarItem(ExtendedBinaryReader reader) { _alleleOrigins = reader.ReadOptArray(reader.ReadAsciiString)?.ToList(); SaAltAllele = reader.ReadAsciiString(); AltAllele = SaAltAllele != null?SupplementaryAnnotationUtilities.ReverseSaReducedAllele(SaAltAllele) : ""; // A ReferenceAllele = reader.ReadAsciiString(); ID = reader.ReadAsciiString(); ReviewStatus = (ReviewStatusEnum)reader.ReadByte(); IsAlleleSpecific = reader.ReadAsciiString(); _medgenIds = reader.ReadOptArray(reader.ReadAsciiString)?.ToList(); _omimIds = reader.ReadOptArray(reader.ReadAsciiString)?.ToList(); _orphanetIds = reader.ReadOptArray(reader.ReadAsciiString)?.ToList(); _phenotypes = reader.ReadOptArray(reader.ReadUtf8String)?.ToList(); Significance = reader.ReadAsciiString(); LastUpdatedDate = reader.ReadOptInt64(); _pubmedIds = reader.ReadOptArray(reader.ReadOptInt64)?.ToList(); }
public static TranscriptConservationScores Read(ExtendedBinaryReader reader) { var id = reader.ReadAsciiString(); var count = reader.ReadOptInt32(); var scores = reader.ReadBytes(count); var item = new TranscriptConservationScores(id, scores); return(item.IsEmpty() ? null : item); }
public static void Read(ExtendedBinaryReader reader, SupplementaryAnnotationPosition sa) { sa.GlobalMinorAllele = reader.ReadAsciiString(); sa.GlobalMinorAlleleFrequency = reader.ReadAsciiString(); sa.GlobalMajorAllele = reader.ReadAsciiString(); sa.GlobalMajorAlleleFrequency = reader.ReadAsciiString(); // read the allele-specific records var numAlleles = reader.ReadOptInt32(); for (var alleleIndex = 0; alleleIndex < numAlleles; alleleIndex++) { var allele = reader.ReadAsciiString(); var asa = AlleleSpecificAnnotation.Read(reader); sa.AlleleSpecificAnnotations[allele] = asa; } // read cosmic records var numCosmic = reader.ReadOptInt32(); for (var i = 0; i < numCosmic; i++) { var cosmicItem = new CosmicItem(reader); sa.CosmicItems.Add(cosmicItem); } // read clinVar items var numClinVar = reader.ReadOptInt32(); for (var i = 0; i < numClinVar; i++) { var clinVarItem = new ClinVarItem(reader); sa.ClinVarItems.Add(clinVarItem); } // read custom annotation items var numCustom = reader.ReadOptInt32(); for (var i = 0; i < numCustom; i++) { var customItem = new CustomItem(reader); sa.CustomItems.Add(customItem); } }
private static List <string> ReadStringLists(ExtendedBinaryReader reader, int count) { var values = new List <string>(); for (var i = 0; i < count; i++) { values.Add(reader.ReadAsciiString()); } return(values); }
/// <summary> /// checks if the footer is good /// </summary> private void CheckFooter() { const string expectedFooter = "EOF"; var footer = _reader.ReadAsciiString(); if (footer != expectedFooter) { throw new GeneralException($"The footer check failed for the OMIM databses ({_omimFile}): ID: exp: {expectedFooter} obs: {footer}"); } }
public static Phenotype ReadPhenotype(ExtendedBinaryReader reader) { var mimNumber = reader.ReadOptInt32(); var phenotype = reader.ReadAsciiString(); var mapping = (Mapping)reader.ReadByte(); var comments = (Comments)reader.ReadByte(); var inheritance = reader.ReadOptArray(reader.ReadAsciiString); var inheritances = inheritance == null ? null : new HashSet <string>(inheritance); return(new Phenotype(mimNumber, phenotype, mapping, comments, inheritances)); }
/// <summary> /// parses the database cache file and populates the specified lists and interval trees /// </summary> public TranscriptCacheData Read(IDictionary <ushort, IChromosome> refIndexToChromosome) { var genes = ReadItems(_reader, () => Gene.Read(_reader, refIndexToChromosome)); var transcriptRegions = ReadItems(_reader, () => TranscriptRegion.Read(_reader)); var mirnas = ReadItems(_reader, () => Interval.Read(_reader)); var peptideSeqs = ReadItems(_reader, () => _reader.ReadAsciiString()); var regulatoryRegions = ReadIntervals(_reader, () => RegulatoryRegion.Read(_reader, refIndexToChromosome)); var transcripts = ReadIntervals(_reader, () => Transcript.Read(_reader, refIndexToChromosome, genes, transcriptRegions, mirnas, peptideSeqs)); return(new TranscriptCacheData(Header, genes, transcriptRegions, mirnas, peptideSeqs, transcripts, regulatoryRegions)); }
private string[] ReadStringArray() { var numStrings = _reader.ReadOptInt32(); var strings = new string[numStrings]; for (int i = 0; i < numStrings; i++) { strings[i] = _reader.ReadAsciiString(); } CheckGuard(); return(strings); }
public NsiReader(Stream stream) { _stream = stream; var compressData = new byte[MaxStreamLength]; int length = stream.Read(compressData, 0, MaxStreamLength); //uncompress var zstd = new Zstandard(); var decompressedLength = zstd.GetDecompressedLength(compressData, length); var decompressedData = new byte[decompressedLength]; zstd.Decompress(compressData, length, decompressedData, decompressedLength); using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Version = DataSourceVersion.Read(memReader); Assembly = (GenomeAssembly)memReader.ReadByte(); JsonKey = memReader.ReadAsciiString(); ReportFor = (ReportFor)memReader.ReadByte(); int schemaVersion = memReader.ReadOptInt32(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}"); } int count = memReader.ReadOptInt32(); var suppIntervals = new Dictionary <ushort, List <Interval <string> > >(); for (var i = 0; i < count; i++) { var saInterval = new SuppInterval(memReader); if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) { intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())); } else { suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > { new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) } }; } _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count); foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals) { _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray()); } } }
/// <summary> /// reads the gene data from the binary reader /// </summary> public static Gene Read(ExtendedBinaryReader reader) { ushort referenceIndex = reader.ReadUInt16(); int start = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); bool onReverseStrand = reader.ReadBoolean(); string symbol = reader.ReadAsciiString(); int hgncId = reader.ReadOptInt32(); var entrezId = CompactId.Read(reader); var ensemblId = CompactId.Read(reader); int mimNumber = reader.ReadOptInt32(); return(new Gene(referenceIndex, start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId, mimNumber)); }
public JasixIndex(Stream stream) : this() { _stream = stream; using (var reader = new ExtendedBinaryReader(stream)) { int version = reader.ReadOptInt32(); if (version != JasixCommons.Version) { throw new InvalidDataException($"Invalid Jasix version: Observed {version}, expected{JasixCommons.Version}"); } int count = reader.ReadOptInt32(); for (var i = 0; i < count; i++) { var chrIndex = new JasixChrIndex(reader); _chrIndices[chrIndex.ReferenceSequence] = chrIndex; } int synonymCount = reader.ReadOptInt32(); for (var i = 0; i < synonymCount; i++) { string synonym = reader.ReadAsciiString(); string indexName = reader.ReadAsciiString(); _synonymToChrName[synonym] = indexName; } int sectionCount = reader.ReadOptInt32(); for (var i = 0; i < sectionCount; i++) { string sectionName = reader.ReadAsciiString(); long begin = reader.ReadOptInt64(); long end = reader.ReadOptInt64(); _sectionRanges[sectionName] = new FileRange(begin, end); } } }
private static Band[] GetCytogeneticBands(ExtendedBinaryReader reader) { int numBands = reader.ReadOptInt32(); var bands = new Band[numBands]; for (var i = 0; i < numBands; i++) { int begin = reader.ReadOptInt32(); int end = reader.ReadOptInt32(); string name = reader.ReadAsciiString(); bands[i] = new Band(begin, end, name); } return(bands); }