private NsiReader(GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, ReportFor reportFor, Dictionary <ushort, IntervalArray <string> > intervalArrays) { Assembly = assembly; Version = version; JsonKey = jsonKey; ReportFor = reportFor; _intervalArrays = intervalArrays; }
private NsiReader(GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, ReportFor reportFor, IntervalArray <string>[] intervalArrays) { Assembly = assembly; Version = version; JsonKey = jsonKey; ReportFor = reportFor; _intervalForest = new IntervalForest <string>(intervalArrays); }
public IntervalTsvWriter(string outputPath, DataSourceVersion dataSourceVersion, string assembly, int dataVersion, string keyName, ReportFor reportingFor) { var fileName = keyName + "_" + dataSourceVersion.Version.Replace(" ", "_") + ".interval.tsv.gz"; _bgzipTextWriter = new BgzipTextWriter(Path.Combine(outputPath, fileName)); _bgzipTextWriter.Write(GetHeader(dataSourceVersion, dataVersion, assembly, keyName, reportingFor)); _tsvIndex = new TsvIndex(Path.Combine(outputPath, fileName) + ".tvi"); }
public SupplementaryInterval(string keyName, string refName, int start, int end, string jsonString, ReportFor reportingFor) { KeyName = keyName; ReferenceName = refName; Start = start; End = end; JsonString = jsonString; ReportingFor = reportingFor; }
public NsiWriter(Stream stream, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion, bool leaveOpen = false) { _stream = stream; _leaveOpen = leaveOpen; WriteHeader(version, assembly, jsonKey, reportFor, schemaVersion); var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress); _writer = new ExtendedBinaryWriter(blockStream, Encoding.UTF8, leaveOpen); }
public NsiWriter(BinaryWriter writer, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion) { _writer = writer; _memStream = new MemoryStream(); _memWriter = new ExtendedBinaryWriter(_memStream); version.Write(_memWriter); _memWriter.Write((byte)assembly); _memWriter.WriteOptAscii(jsonKey); _memWriter.Write((byte)reportFor); _memWriter.WriteOpt(schemaVersion); }
private void ParseHeaderLine(string line) { var words = line.Split('='); if (words.Length < 2) { return; } var key = words[0]; var value = words[1]; switch (key) { case "#name": _name = value; break; case "#assembly": _genomeAssembly = value; break; case "#version": _version = value; break; case "#releaseDate": _releaseDate = value; break; case "#description": _description = value; break; case "#keyName": _keyName = value; break; case "#reportFor": _reportFor = GetReportFor(value); break; case "#schemaVerstion": var schemaVersion = int.Parse(value); if (schemaVersion != JsonCommon.SupplementarySchemaVersion) { throw new InvalidDataException($"Expected Schema version:{JsonCommon.SupplementarySchemaVersion}, oberved: {value}"); } break; } }
public NsiReader(Stream stream) { _stream = stream; var compressData = new byte[MaxStreamLength]; int length = stream.Read(compressData, 0, MaxStreamLength); //uncompress var zstd = new Zstandard(); var decompressedLength = zstd.GetDecompressedLength(compressData, length); var decompressedData = new byte[decompressedLength]; zstd.Decompress(compressData, length, decompressedData, decompressedLength); using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength)) using (var memReader = new ExtendedBinaryReader(memStream)) { Version = DataSourceVersion.Read(memReader); Assembly = (GenomeAssembly)memReader.ReadByte(); JsonKey = memReader.ReadAsciiString(); ReportFor = (ReportFor)memReader.ReadByte(); int schemaVersion = memReader.ReadOptInt32(); if (schemaVersion != SaCommon.SchemaVersion) { throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}"); } int count = memReader.ReadOptInt32(); var suppIntervals = new Dictionary <ushort, List <Interval <string> > >(); for (var i = 0; i < count; i++) { var saInterval = new SuppInterval(memReader); if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) { intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())); } else { suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > { new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) } }; } _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count); foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals) { _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray()); } } }
public static (bool MatchByAllele, bool IsArray, SaJsonValueType PrimaryType, ReportFor reportFor) ParseMatchVariantsBy(string line) { line = line.Trim(); CheckPrefix(line, "#matchVariantsBy"); string firstCol = line.OptimizedSplit('\t')[0]; (_, string matchBy) = firstCol.OptimizedKeyValue(); bool matchByAllele; bool isArray; SaJsonValueType primaryType; ReportFor reportFor = ReportFor.AllVariants; switch (matchBy) { case null: throw new UserErrorException("Please provide the annotation reporting criteria in the format: #matchVariantsBy=allele."); case "allele": matchByAllele = true; isArray = false; primaryType = SaJsonValueType.Object; break; case "position": primaryType = SaJsonValueType.ObjectArray; matchByAllele = false; isArray = true; break; case "sv": primaryType = SaJsonValueType.ObjectArray; matchByAllele = false; isArray = true; reportFor = ReportFor.StructuralVariants; break; default: throw new UserErrorException("matchVariantsBy tag has to be either \'allele\' or \'position\'"); } return(matchByAllele, isArray, primaryType, reportFor); }
public IntervalAnnotationHeader(string name, string assembly, string version, string releaseDate, string description, ReportFor reportingFor) : base(name, assembly, version, releaseDate, description) { _reportingFor = reportingFor; }
public static List <ISupplementaryInterval> GetSpecificIntervals(ReportFor reportFor, IEnumerable <ISupplementaryInterval> intervals) { return(intervals.Where(interval => interval.ReportingFor == reportFor).ToList()); }
public static NsiWriter GetNsiWriter(Stream nsiStream, DataSourceVersion version, GenomeAssembly assembly, string jsonTag, ReportFor reportFor) => new NsiWriter(nsiStream, version, assembly, jsonTag, reportFor, SaCommon.SchemaVersion);
private static string GetHeader(DataSourceVersion dataSourceVersion, int dataVersion, string assembly, string keyName, ReportFor reportingFor) { var sb = StringBuilderCache.Acquire(); sb.Append($"#name={dataSourceVersion.Name}\n"); sb.Append($"#assembly={assembly}\n"); sb.Append($"#version={dataSourceVersion.Version}\n"); sb.Append($"#description={dataSourceVersion.Description}\n"); var releaseDate = new DateTime(dataSourceVersion.ReleaseDateTicks, DateTimeKind.Utc); sb.Append($"#releaseDate={releaseDate:yyyy-MM-dd}\n"); sb.Append($"#dataVersion={dataVersion}\n"); sb.Append($"#schemaVersion={JsonCommon.SupplementarySchemaVersion}\n"); sb.Append($"#reportFor={reportingFor}\n"); sb.Append($"#keyName={keyName}\n"); sb.Append("#CHROM\tSTART\tEND\tJSON\n"); return(StringBuilderCache.GetStringAndRelease(sb)); }
private void WriteHeader(DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion) { using (var writer = new ExtendedBinaryWriter(_stream, Encoding.UTF8, true)) { writer.WriteOptAscii(SaCommon.NsiIdentifier); version.Write(writer); writer.Write((byte)assembly); writer.WriteOptAscii(jsonKey); writer.Write((byte)reportFor); writer.Write(schemaVersion); writer.Write(SaCommon.GuardInt); } }