public void DifferentTypeException()
        {
            var customInterval = new VariantAnnotation.DataStructures.CustomInterval("chr1", 100, 200, "WrongType",
                                                                                     null, null);
            var randomPath = GetRandomPath();

            var dataVersion = new DataSourceVersion("customInterval", "00", DateTime.Now.Ticks);

            using (var writer = new CustomIntervalWriter(randomPath, "chr1", _intervalType, dataVersion))
            {
                // ReSharper disable once AccessToDisposedClosure
                var ex = Assert.Throws <Exception>(() => writer.WriteInterval(customInterval));

                Assert.Equal($"Unexpected interval in custom interval writer.\nExpected interval type: {_intervalType}, observed interval type: WrongType", ex.Message);
            }
        }
Ejemplo n.º 2
0
        private static void CreateNsa(string[] exomeFiles, string genomeFile, DataSourceVersion version)
        {
            Console.WriteLine($"Processing file: {genomeFile}");
            var outName = Path.GetFileNameWithoutExtension(genomeFile);

            using (var exomeReader = GetExomeReader(exomeFiles, genomeFile))
                using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix)))
                        using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GnomadTag, true, false, SaCommon.SchemaVersion, false))
                                using (var reader = GZipUtilities.GetAppropriateStreamReader(genomeFile))
                                {
                                    var gnomadReader = new GnomadSnvReader(reader, exomeReader, referenceProvider);
                                    var count        = nsaWriter.Write(gnomadReader.GetCombinedItems());
                                    Console.WriteLine($"Wrote {count} items to NSA file.");
                                }
        }
Ejemplo n.º 3
0
        private static IEnumerable <IDataSourceVersion> GetDataSourceVersions(CacheHeader header)
        {
            var dataSourceVersions = new List <IDataSourceVersion>();

            if (header == null)
            {
                return(dataSourceVersions);
            }

            var customHeader = header.CustomHeader as TranscriptCacheCustomHeader;
            var vepVersion   = customHeader?.VepVersion;

            var dataSourceVersion = new DataSourceVersion("VEP", vepVersion.ToString(), header.CreationTimeTicks, header.TranscriptSource.ToString());

            dataSourceVersions.Add(dataSourceVersion);
            return(dataSourceVersions);
        }
Ejemplo n.º 4
0
        public void Write_clinvar_basic()
        {
            var version = new DataSourceVersion("source1", "v1", DateTime.Now.Ticks, "description");

            using (var saStream = new MemoryStream())
                using (var indexStream = new MemoryStream())
                {
                    using (var extWriter = new ExtendedBinaryWriter(saStream, Encoding.UTF8, true))
                        using (var indexExtWriter = new ExtendedBinaryWriter(indexStream, Encoding.UTF8, true))
                        {
                            var saWriter = new NsaWriter(extWriter, indexExtWriter, version, GetSequenceProvider(), "clinvar", false, true, SaCommon.SchemaVersion, false, true, false, 1024);
                            saWriter.Write(GetClinvarItems());
                        }

                    saStream.Position    = 0;
                    indexStream.Position = 0;

                    using (var saReader = new NsaReader(saStream, indexStream, 1024))
                    {
                        Assert.Equal(GenomeAssembly.GRCh37, saReader.Assembly);
                        Assert.Equal(version.ToString(), saReader.Version.ToString());
                        saReader.PreLoad(_chrom1, new List <int> {
                            100, 101, 106
                        });
                        var annotations = saReader.GetAnnotation(100).ToList();

                        Assert.Equal("T", annotations[0].refAllele);
                        Assert.Equal("A", annotations[0].altAllele);
                        Assert.Equal("\"id\":\"RCV0001\",\"reviewStatus\":\"no assertion provided\",\"alleleOrigins\":[\"origin1\"],\"refAllele\":\"T\",\"altAllele\":\"A\",\"phenotypes\":[\"phenotype1\"],\"medGenIds\":[\"medgen1\"],\"omimIds\":[\"omim1\"],\"orphanetIds\":[\"orpha1\"],\"significance\":[\"significance\"],\"lastUpdatedDate\":\"0001-01-01\",\"pubMedIds\":[\"10024875684920\"]", annotations[0].annotation);

                        annotations = saReader.GetAnnotation(101).ToList();
                        Assert.Equal("A", annotations[0].refAllele);
                        Assert.Equal("", annotations[0].altAllele);
                        Assert.Equal("\"id\":\"RCV00011\",\"variationId\":101,\"reviewStatus\":\"no assertion provided\",\"alleleOrigins\":[\"origin1\"],\"refAllele\":\"A\",\"altAllele\":\"-\",\"phenotypes\":[\"phenotype1\"],\"medGenIds\":[\"medgen1\"],\"omimIds\":[\"omim1\"],\"orphanetIds\":[\"orpha1\"],\"significance\":[\"significance\"],\"lastUpdatedDate\":\"0001-01-01\",\"pubMedIds\":[\"10024875684920\"]", annotations[0].annotation);

                        saReader.PreLoad(_chrom2, new List <int> {
                            200, 205
                        });
                        var(refAllele, altAllele, annotation) = saReader.GetAnnotation(200).First();
                        Assert.Equal("G", refAllele);
                        Assert.Equal("A", altAllele);
                        Assert.NotNull(annotation);
                    }
                }
        }
Ejemplo n.º 5
0
        private static string GetHeader(DataSourceVersion dataSourceVersion, int dataVersion, string assembly, string keyName, ReportFor reportingFor)
        {
            var sb = StringBuilderCache.Acquire();

            sb.Append($"#name={dataSourceVersion.Name}\n");
            sb.Append($"#assembly={assembly}\n");
            sb.Append($"#version={dataSourceVersion.Version}\n");
            sb.Append($"#description={dataSourceVersion.Description}\n");
            var releaseDate = new DateTime(dataSourceVersion.ReleaseDateTicks, DateTimeKind.Utc);

            sb.Append($"#releaseDate={releaseDate:yyyy-MM-dd}\n");
            sb.Append($"#dataVersion={dataVersion}\n");
            sb.Append($"#schemaVersion={JsonCommon.SupplementarySchemaVersion}\n");
            sb.Append($"#reportFor={reportingFor}\n");
            sb.Append($"#keyName={keyName}\n");
            sb.Append("#CHROM\tSTART\tEND\tJSON\n");
            return(StringBuilderCache.GetStringAndRelease(sb));
        }
Ejemplo n.º 6
0
        private static string GetHeader(DataSourceVersion dataSourceVersion, int dataVersion, string assembly, string keyName, bool isArray)
        {
            var sb = StringBuilderCache.Acquire();

            sb.Append($"#name={dataSourceVersion.Name}\n");
            sb.Append($"#assembly={assembly}\n");
            sb.Append($"#version={dataSourceVersion.Version}\n");
            sb.Append($"#description={dataSourceVersion.Description}\n");
            var releaseDate = new DateTime(dataSourceVersion.ReleaseDateTicks, DateTimeKind.Utc);

            sb.Append($"#releaseDate={releaseDate:yyyy-MM-dd}\n");
            sb.Append($"#dataVersion={dataVersion}\n");
            sb.Append($"#schemaVersion={SaTsvCommon.SupplementarySchemaVersion}\n");
            sb.Append($"#isArray={isArray}\n");
            sb.Append($"#keyName={keyName}\n");
            sb.Append("#GENESYMBOL\tJSON\n");
            return(StringBuilderCache.GetStringAndRelease(sb));
        }
Ejemplo n.º 7
0
        public void ReadBackGeneAnnotations()
        {
            var writeStream = new MemoryStream();
            var version     = new DataSourceVersion("source1", "v1", DateTime.Now.Ticks);
            var ngaWriter   = new NgaWriter(writeStream, version, "mimo", SaCommon.SchemaVersion, true);

            ngaWriter.Write(GetGeneAnnotations());

            var readStream = new MemoryStream(writeStream.ToArray());

            using (var ngaReader = new NgaReader(readStream))
            {
                Assert.Null(ngaReader.GetAnnotation("gene3"));
                Assert.Equal("[{\"mimNumber\":123,\"geneName\":\"gene name 1 ('minibrain', Drosophila, homolog of)\",\"description\":\"describing gene 1\\n\\\"some citation\\\"\",\"phenotypes\":[{\"phenotype\":\"disease 1\",\"mapping\":\"mapping of the wildtype gene\",\"inheritances\":[\"autosomal recessive\"],\"comments\":\"unconfirmed or possibly spurious mapping\"}]}]", ngaReader.GetAnnotation("gene1"));
                Assert.Equal("[{\"mimNumber\":124,\"geneName\":\"gene name 2\",\"phenotypes\":[{\"phenotype\":\"disease 2\",\"mapping\":\"chromosome deletion or duplication syndrome\",\"inheritances\":[\"whatever\",\"never-ever\"],\"comments\":\"nondiseases\"}]}]", ngaReader.GetAnnotation("gene2"));
            }

            ngaWriter.Dispose();
        }
Ejemplo n.º 8
0
        private static IEnumerable <IDataSourceVersion> GetDataSourceVersions(CacheHeader header)
        {
            var dataSourceVersions = new List <IDataSourceVersion>();

            if (header == null)
            {
                return(dataSourceVersions);
            }

            ushort vepVersion = header.Custom.VepVersion;

            // TODO: Embed the data source version in the next cache file format. This hack let's us handle the SARS-CoV-2 genome
            DataSourceVersion dataSourceVersion = vepVersion == 0
                ? new DataSourceVersion("RefSeq", "NC_045512.2", new DateTime(2020, 3, 20, 0, 0, 0, DateTimeKind.Utc).Ticks, "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV2)")
                : new DataSourceVersion("VEP", vepVersion.ToString(), header.CreationTimeTicks, header.Source.ToString());

            dataSourceVersions.Add(dataSourceVersion);
            return(dataSourceVersions);
        }
Ejemplo n.º 9
0
 public static NsaWriter GetNsaWriter(Stream nsaStream, Stream indexStream, VariantAnnotationsParser parser, string dataVersion, ISequenceProvider referenceProvider, out DataSourceVersion version)
 {
     dataVersion = string.IsNullOrEmpty(parser.Version) ? dataVersion : parser.Version;
     version     = new DataSourceVersion(parser.JsonTag, dataVersion, DateTime.Now.Ticks,
                                         parser.DataSourceDescription);
     return(new NsaWriter(
                nsaStream,
                indexStream,
                version,
                referenceProvider,
                parser.JsonTag,
                parser.MatchByAllele, // match by allele
                parser.IsArray,       // is array
                SaCommon.SchemaVersion,
                false,                // is positional
                false,                // skip incorrect ref base
                true                  // throw error on conflicting entries
                ));
 }
Ejemplo n.º 10
0
        public void AddScoreTest()
        {
            var randShorts = GetRandShorts();
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
            var writer     = new ExtendedBinaryWriter(FileUtilities.GetCreateStream(randomPath));

            var version = new DataSourceVersion("phylop", "0", DateTime.Now.Ticks, "unit test");

            var phylopWriter = new PhylopWriter("chr1", version, GenomeAssembly.Unknown, 100, writer);

            foreach (short s in randShorts)
            {
                phylopWriter.AddScore(s); // artificially forcing the writer to flush at every 100 scores
            }
            // we should have 41 intervals but note that the last interval is not dumped by AddScore. Therefore, we have 40
            Assert.Equal(40, phylopWriter.ChromosomeIntervals.Count);

            writer.Dispose();
            File.Delete(randomPath);
        }
Ejemplo n.º 11
0
        public void LoopBack()
        {
            var version = new DataSourceVersion("onekgen", "v0.3", DateTime.Now.Ticks);

            using (var reader = new RefMinorReader(new StreamReader(GetStream()), GetSequenceProvider()))
                using (var stream = new MemoryStream())
                    using (var indexStream = new MemoryStream())
                        using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(stream), new ExtendedBinaryWriter(indexStream), version, GetSequenceProvider(), SaCommon.SchemaVersion))
                        {
                            writer.Write(reader.GetItems());

                            stream.Position      = 0;
                            indexStream.Position = 0;

                            using (var dbReader = new RefMinorDbReader(stream, indexStream))
                            {
                                Assert.Equal("T", dbReader.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 15274));
                                Assert.Null(dbReader.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 1524));
                            }
                        }
        }
Ejemplo n.º 12
0
        private static string GetHeader(DataSourceVersion dataSourceVersion, int schemaVersion, string assembly, string jsonKey, string vcfKeys, bool matchByAllele, bool isArray)
        {
            var sb = StringBuilderCache.Acquire();

            sb.Append($"#name={dataSourceVersion.Name}\n");
            if (!string.IsNullOrEmpty(assembly))
            {
                sb.Append($"#assembly={assembly}\n");
            }
            sb.Append($"#version={dataSourceVersion.Version}\n");
            sb.Append($"#description={dataSourceVersion.Description}\n");
            var releaseDate = new DateTime(dataSourceVersion.ReleaseDateTicks, DateTimeKind.Utc);

            sb.Append($"#releaseDate={releaseDate:yyyy-MM-dd}\n");
            sb.Append($"#dataVersion={schemaVersion}\n");
            sb.Append($"#schemaVersion={SaTsvCommon.SupplementarySchemaVersion}\n");
            sb.Append($"#matchByAllele={matchByAllele}\n");
            sb.Append($"#isArray={isArray}\n");
            sb.Append($"#jsonKey={jsonKey}\n");
            sb.Append($"#vcfKeys={vcfKeys}\n");
            sb.Append("#CHROM\tPOS\tREF\tALT\tVCF\tJSON\n");
            return(StringBuilderCache.GetStringAndRelease(sb));
        }
Ejemplo n.º 13
0
        public void ReadBackGeneAnnotations()
        {
            NgaReader    reader;
            var          version = new DataSourceVersion("source1", "v1", DateTime.Now.Ticks);
            const string jsonKey = "mimo";
            const bool   isArray = true;

            using (var ms = new MemoryStream())
            {
                using (var writer = new NgaWriter(ms, version, jsonKey, SaCommon.SchemaVersion, isArray, true))
                {
                    writer.Write(GetGeneAnnotations());
                }

                ms.Position = 0;
                reader      = NgaReader.Read(ms);
            }

            Assert.NotNull(reader);
            Assert.Null(reader.GetAnnotation("gene3"));
            Assert.Equal("[{\"mimNumber\":123,\"geneName\":\"gene name 1 ('minibrain', Drosophila, homolog of)\",\"description\":\"describing gene 1\\n\\\"some citation\\\"\",\"phenotypes\":[{\"phenotype\":\"disease 1\",\"description\":\"This is disease 1\",\"mapping\":\"mapping of the wildtype gene\",\"inheritances\":[\"autosomal recessive\"],\"comments\":[\"unconfirmed or possibly spurious mapping\"]}]}]", reader.GetAnnotation("gene1"));
            Assert.Equal("[{\"mimNumber\":124,\"geneName\":\"gene name 2\",\"phenotypes\":[{\"phenotype\":\"disease 2\",\"description\":\"COVID-19\",\"mapping\":\"chromosome deletion or duplication syndrome\",\"inheritances\":[\"whatever\",\"never-ever\"],\"comments\":[\"nondiseases\"]}]}]", reader.GetAnnotation("gene2"));
        }
Ejemplo n.º 14
0
        private void ReadHeader()
        {
            var header = _reader.ReadString();

            if (header != OmimDatabaseCommon.DataHeader)
            {
                throw new GeneralException("Unrecognized header in OMIM database");
            }

            var schema = _reader.ReadUInt16();

            if (schema != OmimDatabaseCommon.SchemaVersion)
            {
                throw new GeneralException(
                          $"Custom interval database schema mismatch. Expected {OmimDatabaseCommon.SchemaVersion}, observed {schema}");
            }

            _creationTime = _reader.ReadInt64();


            DataVersion = new DataSourceVersion(_reader);

            CheckGuard();
        }
Ejemplo n.º 15
0
        private void LoadHeader()
        {
            var identifier = _reader.ReadString();

            if (identifier != PhylopCommon.Header)
            {
                throw new InvalidDataException("Unrecognized file header: " + identifier);
            }

            var schemaVersion = _reader.ReadInt16();

            if (schemaVersion != PhylopCommon.SchemaVersion)
            {
                throw new InvalidDataException("Expected phylop schema version:" + PhylopCommon.SchemaVersion + " observed schema version: " + schemaVersion);
            }

            var dataVersion = _reader.ReadInt16();

            if (dataVersion != PhylopCommon.DataVersion)
            {
                Console.WriteLine("WARNING: Expected phylop data version:" + PhylopCommon.DataVersion + " observed data version: " + dataVersion);
            }

            _genomeAssembly = (GenomeAssembly)_reader.ReadByte();
            _version        = DataSourceVersion.Read(_reader);

            // skip the reference name
            _reader.ReadString();

            _intervalListPosition = _reader.ReadInt64();

            CheckGuard();

            LoadChromosomeIntervals();
            IsInitialized = true;
        }
Ejemplo n.º 16
0
        public NgaReader(Stream stream)
        {
            _nsaStream = stream;
            // read the whole file. Currently they are well under 2MB
            var compressedBytes   = new byte[2 * 1024 * 1024];
            var decompressedBytes = new byte[20 * 1024 * 1024];
            var compressedSize    = _nsaStream.Read(compressedBytes, 0, compressedBytes.Length);

            var zstd             = new Zstandard();
            var decompressedSize = zstd.Decompress(compressedBytes, compressedSize, decompressedBytes, decompressedBytes.Length);

            _memStream = new MemoryStream(decompressedBytes, 0, decompressedSize);
            _reader    = new ExtendedBinaryReader(_memStream);

            Version  = DataSourceVersion.Read(_reader);
            JsonKey  = _reader.ReadAsciiString();
            _isArray = _reader.ReadBoolean();
            ushort schemaVersion = _reader.ReadOptUInt16();

            if (schemaVersion != SaCommon.SchemaVersion)
            {
                throw new UserErrorException($"Expected schema version: {SaCommon.SchemaVersion}, observed: {schemaVersion} for {JsonKey}");
            }
        }
Ejemplo n.º 17
0
        private Stream GetNsiStream()
        {
            var stream  = new MemoryStream();
            var version = new DataSourceVersion("test", "June_2020", DateTime.Now.Ticks, "dummy");

            using (var writer = new NsiWriter(stream, version, GenomeAssembly.GRCh37, SaCommon.LowComplexityRegionTag,
                                              ReportFor.AllVariants,
                                              SaCommon.NsiSchemaVersion, true))
            {
                writer.Write(new []
                {
                    new LcrInterval(ChromosomeUtilities.Chr1, 100, 150),
                    new LcrInterval(ChromosomeUtilities.Chr1, 300, 450),
                    new LcrInterval(ChromosomeUtilities.Chr1, 600, 650),
                    new LcrInterval(ChromosomeUtilities.Chr2, 100, 150),
                    new LcrInterval(ChromosomeUtilities.Chr2, 300, 450),
                    new LcrInterval(ChromosomeUtilities.Chr2, 600, 650),
                });
            }

            stream.Position = 0;

            return(stream);
        }
Ejemplo n.º 18
0
 public static NsiWriter GetNsiWriter(Stream nsiStream, DataSourceVersion version, GenomeAssembly assembly, string jsonTag, ReportFor reportFor) => new NsiWriter(nsiStream, version, assembly, jsonTag, reportFor, SaCommon.SchemaVersion);
Ejemplo n.º 19
0
        public static NgaWriter GetNgaWriter(Stream ngaStream, GeneAnnotationsParser parser, string dataVersion)
        {
            var version = new DataSourceVersion(parser.JsonTag, dataVersion, DateTime.Now.Ticks);

            return(new NgaWriter(ngaStream, version, parser.JsonTag, SaCommon.SchemaVersion, false));
        }
Ejemplo n.º 20
0
 public ClinvarTsvWriter(DataSourceVersion version, string outputDirectory, GenomeAssembly genomeAssembly, ISequenceProvider sequenceProvider) : this(new SaTsvWriter(outputDirectory, version, genomeAssembly.ToString(),
                                                                                                                                                                      SaTsvCommon.ClinvarSchemaVersion, InterimSaCommon.ClinvarTag, InterimSaCommon.ClinvarVcfTag, false, sequenceProvider, true))
 {
     Console.WriteLine(version.ToString());
 }
Ejemplo n.º 21
0
        public void ReadAndWriteExacWithMultipleAlleles()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var exacVersion = new DataSourceVersion("ExAC", "0.3.1", DateTime.Parse("2016-03-16").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                exacVersion
            };

            // create our expected supplementary annotations: note AN_adj is modified in this
            const string vcfline =
                "19	3121452	.	TA	T,TAA	17262.47	AC_Adj0_Filter	AC=6,9;AC_AFR=0,0;AC_AMR=0,0;AC_Adj=0,0;AC_EAS=0,0;AC_FIN=0,0;AC_Het=0,0,0;AC_Hom=0,0;AC_NFE=0,0;AC_OTH=0,0;AC_SAS=0,0;AF=4.587e-03,6.881e-03;AN=1308;AN_AFR=0;AN_AMR=0;AN_Adj=3;AN_EAS=0;AN_FIN=0;AN_NFE=0;AN_OTH=0;AN_SAS=0;BaseQRankSum=0.437;DP=2838";

            var sa        = new SupplementaryAnnotationPosition(3121453);
            var saCreator = new SupplementaryPositionCreator(sa);

            var exacReader      = new ExacReader(_renamer);
            var additionalItems = new List <SupplementaryDataItem>();

            foreach (var exacItem in exacReader.ExtractItems(vcfline))
            {
                var currentItem = exacItem.SetSupplementaryAnnotations(saCreator);
                additionalItems.Add(currentItem);
            }
            var currentSa        = new SupplementaryAnnotationPosition(3121453);
            var currentSaCreator = new SupplementaryPositionCreator(currentSa);

            foreach (var exacItem in additionalItems)
            {
                exacItem.SetSupplementaryAnnotations(currentSaCreator);
            }

            // write the supplementary annotation file
            using (
                var writer = new SupplementaryAnnotationWriter(randomPath, "chr19",
                                                               expectedDataSourceVersions))
            {
                writer.Write(currentSaCreator, currentSa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(3121453) as SupplementaryAnnotationPosition;

                Assert.NotNull(observedAnnotation1);

                // we want to make sure we are reading the values we have written

                var expExaciA =
                    currentSa.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExaciA);
                var obsExaciA =
                    observedAnnotation1.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExaciA);

                Assert.Equal(expExaciA.ExacAllAn, obsExaciA.ExacAllAn);

                Assert.Equal(expExaciA.ExacCoverage, obsExaciA.ExacCoverage);
                Assert.Equal(expExaciA.ExacAllAc, obsExaciA.ExacAllAc);
                Assert.NotNull(obsExaciA.ExacAllAc);
                Assert.Null(obsExaciA.ExacFinAc);
                Assert.Null(obsExaciA.ExacFinAn);

                // we want to make sure we are reading the values we have written

                var expExac1 =
                    currentSa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExac1);
                var obsExac1 =
                    observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExac1);

                Assert.Equal(expExac1.ExacAllAn, obsExac1.ExacAllAn);

                Assert.Equal(expExac1.ExacCoverage, obsExac1.ExacCoverage);
                Assert.Equal(expExac1.ExacAllAc, obsExac1.ExacAllAc);
                Assert.NotNull(obsExac1.ExacAllAc);
                Assert.Null(obsExac1.ExacFinAc);
                Assert.Null(obsExac1.ExacFinAn);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 22
0
        public void ReadWriteWithSuppIntervals()
        {
            // NIR-1359
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion
            };

            // This is the case where Nirvana throws an error: Too many bytes in what should have been a 7 bit encoded Int32.

            var sa        = new SupplementaryAnnotationPosition(5073770);
            var saCreator = new SupplementaryPositionCreator(sa);


            // adding a supplementary interval
            var intValues    = new Dictionary <string, int>();
            var doubleValues = new Dictionary <string, double>();
            var freqValues   = new Dictionary <string, double>();
            var stringValues = new Dictionary <string, string>();
            var boolValues   = new List <string>();

            var suppInterval = new SupplementaryInterval(5073770, 5073970, "chr1", "<DUP>", VariantType.duplication, "ClinVar", _renamer, intValues,
                                                         doubleValues, freqValues, stringValues, boolValues);

            suppInterval.AddStringValue("ID", "RandomClin001");

            // the above code was unit tested in MergeDbSnpClinVar()
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr9", expectedDataSourceVersions))
            {
                writer.SetIntervalList(new List <SupplementaryInterval> {
                    suppInterval
                });
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // read the stored intervals
                var suppIntervals = reader.GetSupplementaryIntervals(_renamer).ToList();
                Assert.Equal(1, suppIntervals.Count);

                foreach (var interval in suppIntervals)
                {
                    Assert.Equal(5073770, interval.Start);
                    Assert.Equal(5073970, interval.End);
                    Assert.Equal("<DUP>", interval.AlternateAllele);
                    Assert.Equal("ClinVar", interval.Source);
                    Assert.Equal("duplication", interval.VariantType.ToString());

                    foreach (var keyValuePair in interval.StringValues)
                    {
                        if (keyValuePair.Key == "ID")
                        {
                            Assert.Equal("RandomClin001", keyValuePair.Value);
                        }
                        if (keyValuePair.Key == "vid")
                        {
                            Assert.Equal("1:5073770:5073970", keyValuePair.Value);
                        }
                    }
                }
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 23
0
 public SaTsvWriter(string outputDir, DataSourceVersion dataSourceVersion, string assembly, int schemaVersion, string jsonKey, string vcfKeys,
                    bool isAlleleSpecific, ISequenceProvider sequenceProvider, bool isArray = false) : this(outputDir, dataSourceVersion, assembly, schemaVersion, jsonKey, vcfKeys, isAlleleSpecific, isArray)
 {
     _sequenceProvider = sequenceProvider;
 }
Ejemplo n.º 24
0
        //todo: filter chromIndex=ushort.Max
        public NsaWriter(ExtendedBinaryWriter writer, ExtendedBinaryWriter indexWriter, DataSourceVersion version, ISequenceProvider refProvider, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional, bool skipIncorrectRefEntries = true, bool throwErrorOnConflicts = false, int blockSize = SaCommon.DefaultBlockSize)
        {
            _stream                  = writer.BaseStream;
            _writer                  = writer;
            _isPositional            = isPositional;
            _skipIncorrectRefEntries = skipIncorrectRefEntries;
            _throwErrorOnConflicts   = throwErrorOnConflicts;
            _block       = new NsaBlock(new Zstandard(), blockSize);
            _refProvider = refProvider;

            _index     = new ChunkedIndex(indexWriter, refProvider.Assembly, version, jsonKey, matchByAllele, isArray, schemaVersion, isPositional);
            _memBuffer = new byte[short.MaxValue * 2];
            _memStream = new MemoryStream(_memBuffer);
            _memWriter = new ExtendedBinaryWriter(_memStream);
        }
Ejemplo n.º 25
0
 public MitoMapVarTsvWriter(DataSourceVersion version, string outputDirectory, string mitoMapDataType, ISequenceProvider sequenceProvider)
 {
     Console.WriteLine(version.ToString());
     _mitoMapVarWriter = new SaTsvWriter(outputDirectory, version, GenomeAssembly.rCRS.ToString(), SaTsvCommon.MitoMapSchemaVersion, mitoMapDataType, null, false, sequenceProvider, true);
 }
Ejemplo n.º 26
0
        public void ReadWriteExacDbsnp()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion  = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, cosmicVersion
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "2	48010488	rs1042821	G	A	.	.	RS=1042821;RSPOS=48010488;RV;dbSNPBuildID=86;SSR=0;SAO=1;VP=0x050168420a05150136100100;GENEINFO=MSH6:2956;WGT=1;VC=SNV;PM;PMC;SLO;NSM;REF;U5;R5;ASP;VLD;G5;GNO;KGPhase1;KGPhase3;LSD;CAF=0.7991,0.2009;COMMON=1";
            const string vcfLine2 =
                "2	48010488	rs1042821	G	A,C	14068898.15	PASS	AC=21019,1;AC_AFR=1700,0;AC_AMR=1015,1;AC_Adj=19510,1;AC_EAS=1973,0;AC_FIN=743,0;AC_Het=15722,1,0;AC_Hom=1894,0;AC_NFE=10593,0;AC_OTH=147,0;AC_SAS=3339,0;AF=0.178,8.487e-06;AN=117830;AN_AFR=6388;AN_AMR=9014;AN_Adj=91130;AN_EAS=6792;AN_FIN=5078;AN_NFE=48404;AN_OTH=664;AN_SAS=14790;BaseQRankSum=-4.850e-01;ClippingRankSum=-1.400e-01;DB;DP=1206681;FS=0.000;GQ_MEAN=129.86;GQ_STDDEV=221.88;Het_AFR=1322,0,0;Het_AMR=931,1,0;Het_EAS=1511,0,0;Het_FIN=665,0,0;Het_NFE=8585,0,0;Het_OTH=111,0,0;Het_SAS=2597,0,0;Hom_AFR=189,0;Hom_AMR=42,0;Hom_EAS=231,0;Hom_FIN=39,0;Hom_NFE=1004,0;Hom_OTH=18,0;Hom_SAS=371,0;InbreedingCoeff=0.0376;MQ=60.00;MQ0=0;MQRankSum=0.00;NCC=3737;POSITIVE_TRAIN_SITE;QD=17.46;ReadPosRankSum=0.181;VQSLOD=5.87;culprit=MQ;DP_HIST=3051|9435|11318|5521|9711|11342|4131|1270|615|404|328|266|264|262|196|186|126|115|97|277,133|968|2180|3402|3564|2815|1772|954|551|389|321|263|261|261|196|186|126|115|97|277,0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=949|2966|347|492|15135|1435|1335|854|421|526|590|416|13672|1951|445|462|255|174|211|16279,24|79|81|124|135|96|110|118|97|180|228|137|182|191|126|171|180|151|192|16229,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1";

            var sa        = new SupplementaryAnnotationPosition(48010488);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem1  = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem1.SetSupplementaryAnnotations(saCreator);

            var exacReader = new ExacReader(_renamer);

            foreach (var exacItem in exacReader.ExtractItems(vcfLine2))
            {
                exacItem.SetSupplementaryAnnotations(saCreator);
            }


            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr2", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(48010488) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);


                var expDbSnpA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                var obsDbSnpA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                Assert.NotNull(expDbSnpA);
                Assert.NotNull(obsDbSnpA);

                // we want to make sure we are reading the values we have written
                Assert.Equal(expDbSnpA.DbSnp, obsDbSnpA.DbSnp);


                var expExacA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacA);
                Assert.NotNull(obsExacA);

                Assert.Equal(expExacA.ExacAllAn, obsExacA.ExacAllAn);
                Assert.Equal(expExacA.ExacCoverage, obsExacA.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacA.ExacAllAc), Convert.ToDouble(obsExacA.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAfrAc), Convert.ToDouble(obsExacA.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAmrAc), Convert.ToDouble(obsExacA.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacEasAc), Convert.ToDouble(obsExacA.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacFinAc), Convert.ToDouble(obsExacA.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacNfeAc), Convert.ToDouble(obsExacA.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacOthAc), Convert.ToDouble(obsExacA.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacSasAc), Convert.ToDouble(obsExacA.ExacSasAc));


                var expExacC =
                    sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacC =
                    observedAnnotation1.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacC);
                Assert.NotNull(obsExacC);

                Assert.Equal(expExacC.ExacCoverage, obsExacC.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacC.ExacAllAc), Convert.ToDouble(obsExacC.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAfrAc), Convert.ToDouble(obsExacC.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAmrAc), Convert.ToDouble(obsExacC.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacEasAc), Convert.ToDouble(obsExacC.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacFinAc), Convert.ToDouble(obsExacC.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacNfeAc), Convert.ToDouble(obsExacC.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacOthAc), Convert.ToDouble(obsExacC.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacSasAc), Convert.ToDouble(obsExacC.ExacSasAc));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 27
0
        public void ReadAndWriteDbSnp1KgEvs()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	69428	rs140739101	T	G	.	.	RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1";
            const string vcfLine2 = "1	69428	rs140739101	T	G	100	PASS	AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||";
            const string vcfLine3 = "1	69428	rs140739101	T	G	.	PASS	BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=.";

            const string altAllele = "G";
            var          sa        = new SupplementaryAnnotationPosition(69428);
            var          saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem   = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem.SetSupplementaryAnnotations(saCreator);

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            oneKGenItem.SetSupplementaryAnnotations(saCreator);

            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine3)[0];

            evsItem.SetSupplementaryAnnotations(saCreator);

            // the preceeding code has been unit tested in  MergeDbSnp1kpEvs()

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);

                var expDbSnp =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                    as DbSnpAnnotation;
                Assert.NotNull(expDbSnp);

                var expOneKg =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                    as OneKGenAnnotation;
                Assert.NotNull(expOneKg);

                var expEvs =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                    as EvsAnnotation;
                Assert.NotNull(expEvs);

                var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                               as DbSnpAnnotation;
                Assert.NotNull(obsDbSnp);

                var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                               as OneKGenAnnotation;
                Assert.NotNull(obsOneKg);

                var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                             as EvsAnnotation;
                Assert.NotNull(obsEvs);

                Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp);

                Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll);
                Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc);

                Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage);
                Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 28
0
        public void ReadWriteDbSnpCosmic()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion  = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, cosmicVersion
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	10228	rs143255646	TA	T	.	.	RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP";

            var sa        = new SupplementaryAnnotationPosition(10229);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem1      = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem1.SetSupplementaryAnnotations(saCreator)
            };

            var cosmicItem1 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53",
                                             new HashSet <CosmicItem.CosmicStudy> {
                new CosmicItem.CosmicStudy("", "carcinoma", "oesophagus")
            }, null);
            var cosmicItem2 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53",
                                             new HashSet <CosmicItem.CosmicStudy> {
                new CosmicItem.CosmicStudy("01", "carcinoma", "large_intestine")
            }, null);

            additionalItems.Add(cosmicItem1.SetSupplementaryAnnotations(saCreator));
            additionalItems.Add(cosmicItem2.SetSupplementaryAnnotations(saCreator));

            //sa.Clear();
            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(saCreator);
            }

            Assert.Equal(1, sa.CosmicItems.Count);
            // the preceeding code has been unit tested in  MergeDbSnpCosmic()

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(10229) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);

                var expDbSnp =
                    sa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                    DbSnpAnnotation;
                Assert.NotNull(expDbSnp);
                var obsDbSnp =
                    observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                    DbSnpAnnotation;
                Assert.NotNull(obsDbSnp);

                Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp);
                Assert.True(observedAnnotation1.ContainsCosmicId(sa.CosmicItems[0].ID));
                Assert.Equal(1, observedAnnotation1.CosmicItems.Count);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 29
0
        public ChunkedIndex(ExtendedBinaryWriter indexWriter, GenomeAssembly assembly, DataSourceVersion version, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional)
        {
            _writer       = indexWriter;
            MatchByAllele = matchByAllele;
            JsonKey       = jsonKey;
            Version       = version;
            Assembly      = assembly;
            IsArray       = isArray;
            IsPositional  = isPositional;

            indexWriter.Write((byte)assembly);
            version.Write(indexWriter);
            indexWriter.WriteOptAscii(jsonKey);
            indexWriter.Write(matchByAllele);
            indexWriter.Write(isArray);
            indexWriter.WriteOpt(schemaVersion);
            indexWriter.Write(isPositional);

            _chromChunks = new Dictionary <ushort, List <Chunk> >();
        }
Ejemplo n.º 30
0
        public void MultiAlleleMergeDbSnp1KpEvsSaRw()
        {
            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	1564952	rs112177324	TG	T	.	.	RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1";
            const string vcfLine2 = "1	1564952	rs112177324	TG	TGG,T	100	PASS	AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136";
            const string vcfLine3 = "1	1564952	rs112177324	TG	TGG,T	.	PASS	BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=.";

            var sa        = new SupplementaryAnnotationPosition(1564953);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem       = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem.SetSupplementaryAnnotations(saCreator)
            };

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator));

            var evsReader    = new EvsReader(_renamer);
            var evsItemsList = evsReader.ExtractItems(vcfLine3);

            foreach (var evsItem in evsItemsList)
            {
                additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator));
            }

            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(saCreator);
            }

            // write the supplementary annotation file
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // checking the global alleles
                Assert.Null(sa.GlobalMajorAllele);
                Assert.Null(sa.GlobalMajorAlleleFrequency);
                Assert.Null(sa.GlobalMinorAllele);
                Assert.Null(sa.GlobalMinorAlleleFrequency);

                // extract the three annotations
                var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation);

                var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc;
                var expectedDelHasOneKg   = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg);

                var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr;

                var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp);

                var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"];
                var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"];

                Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc);
                Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg));

                Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr);
                Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }