Beispiel #1
0
 private NsiReader(GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, ReportFor reportFor, Dictionary <ushort, IntervalArray <string> > intervalArrays)
 {
     Assembly        = assembly;
     Version         = version;
     JsonKey         = jsonKey;
     ReportFor       = reportFor;
     _intervalArrays = intervalArrays;
 }
Beispiel #2
0
 private NsiReader(GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, ReportFor reportFor, IntervalArray <string>[] intervalArrays)
 {
     Assembly        = assembly;
     Version         = version;
     JsonKey         = jsonKey;
     ReportFor       = reportFor;
     _intervalForest = new IntervalForest <string>(intervalArrays);
 }
Beispiel #3
0
        public IntervalTsvWriter(string outputPath, DataSourceVersion dataSourceVersion, string assembly, int dataVersion, string keyName,
                                 ReportFor reportingFor)
        {
            var fileName = keyName + "_" + dataSourceVersion.Version.Replace(" ", "_") + ".interval.tsv.gz";

            _bgzipTextWriter = new BgzipTextWriter(Path.Combine(outputPath, fileName));

            _bgzipTextWriter.Write(GetHeader(dataSourceVersion, dataVersion, assembly, keyName, reportingFor));
            _tsvIndex = new TsvIndex(Path.Combine(outputPath, fileName) + ".tvi");
        }
 public SupplementaryInterval(string keyName, string refName, int start, int end, string jsonString,
                              ReportFor reportingFor)
 {
     KeyName       = keyName;
     ReferenceName = refName;
     Start         = start;
     End           = end;
     JsonString    = jsonString;
     ReportingFor  = reportingFor;
 }
Beispiel #5
0
        public NsiWriter(Stream stream, DataSourceVersion version,
                         GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion,
                         bool leaveOpen = false)
        {
            _stream    = stream;
            _leaveOpen = leaveOpen;
            WriteHeader(version, assembly, jsonKey, reportFor, schemaVersion);

            var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress);

            _writer = new ExtendedBinaryWriter(blockStream, Encoding.UTF8, leaveOpen);
        }
Beispiel #6
0
        public NsiWriter(BinaryWriter writer, DataSourceVersion version,
                         GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion)
        {
            _writer    = writer;
            _memStream = new MemoryStream();
            _memWriter = new ExtendedBinaryWriter(_memStream);

            version.Write(_memWriter);
            _memWriter.Write((byte)assembly);
            _memWriter.WriteOptAscii(jsonKey);
            _memWriter.Write((byte)reportFor);
            _memWriter.WriteOpt(schemaVersion);
        }
        private void ParseHeaderLine(string line)
        {
            var words = line.Split('=');

            if (words.Length < 2)
            {
                return;
            }

            var key   = words[0];
            var value = words[1];

            switch (key)
            {
            case "#name":
                _name = value;
                break;

            case "#assembly":
                _genomeAssembly = value;
                break;

            case "#version":
                _version = value;
                break;

            case "#releaseDate":
                _releaseDate = value;
                break;

            case "#description":
                _description = value;
                break;

            case "#keyName":
                _keyName = value;
                break;

            case "#reportFor":
                _reportFor = GetReportFor(value);
                break;

            case "#schemaVerstion":
                var schemaVersion = int.Parse(value);
                if (schemaVersion != JsonCommon.SupplementarySchemaVersion)
                {
                    throw new InvalidDataException($"Expected Schema version:{JsonCommon.SupplementarySchemaVersion}, oberved: {value}");
                }
                break;
            }
        }
Beispiel #8
0
        public NsiReader(Stream stream)
        {
            _stream = stream;
            var compressData = new byte[MaxStreamLength];
            int length       = stream.Read(compressData, 0, MaxStreamLength);
            //uncompress
            var zstd = new Zstandard();
            var decompressedLength = zstd.GetDecompressedLength(compressData, length);
            var decompressedData   = new byte[decompressedLength];

            zstd.Decompress(compressData, length, decompressedData, decompressedLength);

            using (var memStream = new MemoryStream(decompressedData, 0, decompressedLength))
                using (var memReader = new ExtendedBinaryReader(memStream))
                {
                    Version   = DataSourceVersion.Read(memReader);
                    Assembly  = (GenomeAssembly)memReader.ReadByte();
                    JsonKey   = memReader.ReadAsciiString();
                    ReportFor = (ReportFor)memReader.ReadByte();
                    int schemaVersion = memReader.ReadOptInt32();

                    if (schemaVersion != SaCommon.SchemaVersion)
                    {
                        throw new UserErrorException($"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {JsonKey}");
                    }


                    int count         = memReader.ReadOptInt32();
                    var suppIntervals = new Dictionary <ushort, List <Interval <string> > >();
                    for (var i = 0; i < count; i++)
                    {
                        var saInterval = new SuppInterval(memReader);
                        if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals))
                        {
                            intervals.Add(new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));
                        }
                        else
                        {
                            suppIntervals[saInterval.Chromosome.Index] = new List <Interval <string> > {
                                new Interval <string>(saInterval.Start, saInterval.End, saInterval.GetJsonString())
                            }
                        };
                    }

                    _intervalArrays = new Dictionary <ushort, IntervalArray <string> >(suppIntervals.Count);
                    foreach ((ushort chromIndex, List <Interval <string> > intervals) in suppIntervals)
                    {
                        _intervalArrays[chromIndex] = new IntervalArray <string>(intervals.ToArray());
                    }
                }
        }
Beispiel #9
0
        public static (bool MatchByAllele, bool IsArray, SaJsonValueType PrimaryType, ReportFor reportFor) ParseMatchVariantsBy(string line)
        {
            line = line.Trim();
            CheckPrefix(line, "#matchVariantsBy");
            string firstCol = line.OptimizedSplit('\t')[0];

            (_, string matchBy) = firstCol.OptimizedKeyValue();

            bool            matchByAllele;
            bool            isArray;
            SaJsonValueType primaryType;
            ReportFor       reportFor = ReportFor.AllVariants;

            switch (matchBy)
            {
            case null:
                throw new UserErrorException("Please provide the annotation reporting criteria in the format: #matchVariantsBy=allele.");

            case "allele":
                matchByAllele = true;
                isArray       = false;
                primaryType   = SaJsonValueType.Object;
                break;

            case "position":
                primaryType   = SaJsonValueType.ObjectArray;
                matchByAllele = false;
                isArray       = true;
                break;

            case "sv":
                primaryType   = SaJsonValueType.ObjectArray;
                matchByAllele = false;
                isArray       = true;
                reportFor     = ReportFor.StructuralVariants;
                break;

            default:
                throw new UserErrorException("matchVariantsBy tag has to be either \'allele\' or \'position\'");
            }

            return(matchByAllele, isArray, primaryType, reportFor);
        }
 public IntervalAnnotationHeader(string name, string assembly, string version, string releaseDate, string description,
                                 ReportFor reportingFor) : base(name, assembly, version, releaseDate, description)
 {
     _reportingFor = reportingFor;
 }
Beispiel #11
0
 public static List <ISupplementaryInterval> GetSpecificIntervals(ReportFor reportFor, IEnumerable <ISupplementaryInterval> intervals)
 {
     return(intervals.Where(interval => interval.ReportingFor == reportFor).ToList());
 }
Beispiel #12
0
 public static NsiWriter GetNsiWriter(Stream nsiStream, DataSourceVersion version, GenomeAssembly assembly, string jsonTag, ReportFor reportFor) => new NsiWriter(nsiStream, version, assembly, jsonTag, reportFor, SaCommon.SchemaVersion);
Beispiel #13
0
        private static string GetHeader(DataSourceVersion dataSourceVersion, int dataVersion, string assembly, string keyName, ReportFor reportingFor)
        {
            var sb = StringBuilderCache.Acquire();

            sb.Append($"#name={dataSourceVersion.Name}\n");
            sb.Append($"#assembly={assembly}\n");
            sb.Append($"#version={dataSourceVersion.Version}\n");
            sb.Append($"#description={dataSourceVersion.Description}\n");
            var releaseDate = new DateTime(dataSourceVersion.ReleaseDateTicks, DateTimeKind.Utc);

            sb.Append($"#releaseDate={releaseDate:yyyy-MM-dd}\n");
            sb.Append($"#dataVersion={dataVersion}\n");
            sb.Append($"#schemaVersion={JsonCommon.SupplementarySchemaVersion}\n");
            sb.Append($"#reportFor={reportingFor}\n");
            sb.Append($"#keyName={keyName}\n");
            sb.Append("#CHROM\tSTART\tEND\tJSON\n");
            return(StringBuilderCache.GetStringAndRelease(sb));
        }
Beispiel #14
0
 private void WriteHeader(DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion)
 {
     using (var writer = new ExtendedBinaryWriter(_stream, Encoding.UTF8, true))
     {
         writer.WriteOptAscii(SaCommon.NsiIdentifier);
         version.Write(writer);
         writer.Write((byte)assembly);
         writer.WriteOptAscii(jsonKey);
         writer.Write((byte)reportFor);
         writer.Write(schemaVersion);
         writer.Write(SaCommon.GuardInt);
     }
 }