public void Gene_in_gene_annotation_database_get_annotated()
        {
            var annotatedGene = new AnnotatedGene("A2M",
                                                  new IGeneAnnotationSource[] { new GeneAnnotationSource("omim", new[] { "{\"mimNumber\":103950,\"description\":\"Alpha-2-macroglobulin\",\"phenotypes\":[{\"mimNumber\":614036,\"phenotype\":\"Alpha-2-macroglobulin deficiency\",\"mapping\":\"mapping of the wildtype gene\",\"inheritances\":[\"Autosomal dominant\"]}", "{\"mimNumber\":104300,\"phenotype\":\"Alzheimer disease, susceptibility to\",\"mapping\":\"molecular basis of the disorder is known\",\"inheritances\":[\"Autosomal dominant\"],\"comments\":\"contribute to susceptibility to multifactorial disorders or to susceptibility to infection\"}]}" }, true) });

            var ms     = new MemoryStream();
            var header = new SupplementaryAnnotationHeader("", DateTime.Now.Ticks, 1, new IDataSourceVersion[] { }, GenomeAssembly.Unknown);

            using (var writer = new GeneDatabaseWriter(ms, header, true))
            {
                writer.Write(annotatedGene);
            }

            ms.Position = 0;
            var reader = new GeneDatabaseReader(ms);

            var geneAnnotationProvider = new GeneAnnotationProvider(reader);

            var observedAnnotation  = geneAnnotationProvider.Annotate("A2M");
            var observedAnnotation2 = geneAnnotationProvider.Annotate("A2M2L");

            Assert.NotNull(observedAnnotation);
            Assert.Null(observedAnnotation2);
            Assert.Single(observedAnnotation.Annotations);
            Assert.Equal("omim", observedAnnotation.Annotations[0].DataSource);
        }
Example #2
0
        public void SaReader_And_SaWriter_Tests()
        {
            var saMs    = new MemoryStream();
            var indexMs = new MemoryStream();

            var dataSourceVersions = new[]
            {
                new DataSourceVersion("clinvar", "20", DateTime.Today.Ticks, "clinvar dataset"),
                new DataSourceVersion("dbSnp", "18", DateTime.Parse("12/20/2010").Ticks, "dbSNP")
            };
            var header         = new SupplementaryAnnotationHeader("chr1", DateTime.Now.Ticks, 1, dataSourceVersions, GenomeAssembly.GRCh37);
            var smallIntervals = new List <ISupplementaryInterval>
            {
                new SupplementaryInterval("data1", "chr1", 100, 150, "", ReportFor.SmallVariants)
            };
            var svIntervals = new List <ISupplementaryInterval>
            {
                new SupplementaryInterval("data2", "chr1", 100, 1000, "", ReportFor.StructuralVariants)
            };

            var allIntervals = new List <ISupplementaryInterval>
            {
                new SupplementaryInterval("data3", "chr1", 100, 1000, "", ReportFor.AllVariants)
            };

            var saDataSources = new ISaDataSource[4];

            saDataSources[0] = new SaDataSource("data1", "data1", "A", false, true, "acd", new[] { "\"id\":\"123\"" });
            saDataSources[1] = new SaDataSource("data2", "data2", "T", false, true, "acd", new[] { "\"id\":\"123\"" });
            saDataSources[2] = new SaDataSource("data3", "data3", "A", false, false, "acd", new[] { "\"id\":\"123\"" });
            saDataSources[3] = new SaDataSource("data4", "data4", "T", false, false, "acd", new[] { "\"id\":\"123\"" });

            var saPos = new SaPosition(saDataSources, "A");

            using (var saWriter = new SaWriter(saMs, indexMs, header, smallIntervals, svIntervals, allIntervals, new List <(int, string)>(), true))
            {
                saWriter.Write(saPos, 150);
            }
            saMs.Position    = 0;
            indexMs.Position = 0;
            ISaPosition obseveredPosition, obseveredPosition2;

            using (var saReader = new SaReader(saMs, indexMs))
            {
                obseveredPosition  = saReader.GetAnnotation(150);
                obseveredPosition2 = saReader.GetAnnotation(200);
            }

            Assert.Equal("A", obseveredPosition.GlobalMajorAllele);
            Assert.Equal(4, obseveredPosition.DataSources.Length);
            Assert.Null(obseveredPosition2);
        }
Example #3
0
        private static void MergeGene(IReadOnlyList <GeneTsvReader> geneReaders, IEnumerable <SaHeader> geneHeaders, string outputDirectory, GenomeAssembly assembly)
        {
            var geneAnnotationDatabasePath = Path.Combine(outputDirectory, SaDataBaseCommon.GeneLevelAnnotationFileName);
            var geneAnnotationStream       = FileUtilities.GetCreateStream(geneAnnotationDatabasePath);
            var databaseHeader             = new SupplementaryAnnotationHeader("", DateTime.Now.Ticks, SaDataBaseCommon.DataVersion, geneHeaders.Select(x => x.GetDataSourceVersion()), assembly);

            using (var writer = new GeneDatabaseWriter(geneAnnotationStream, databaseHeader))
            {
                foreach (var annotatedGene in GetAnnotatedGenes(geneReaders) ?? Enumerable.Empty <IAnnotatedGene>())
                {
                    writer.Write(annotatedGene);
                }
            }
        }
Example #4
0
        private void MergeChrom(string refName)
        {
            var creationBench             = new Benchmark();
            var currentChrAnnotationCount = 0;
            int refMinorCount;

            var saEnumerators = GetSaEnumerators(refName);

            //return;
            var globalMajorAlleleInRefMinors = GetGlobalMajorAlleleForRefMinors(refName);

            var dataSourceVersions = MergeUtilities.GetDataSourceVersions(_saHeaders);

            var ucscRefName = _refNameToChromosome[refName].UcscName;

            var header = new SupplementaryAnnotationHeader(ucscRefName, DateTime.Now.Ticks,
                                                           SaDataBaseCommon.DataVersion, dataSourceVersions, _genomeAssembly);

            //we need a list because we will enumerate over it multiple times
            var intervals = MergeUtilities.GetIntervals(_intervalReaders, refName).OrderBy(x => x.Start).ThenBy(x => x.End).ToList();

            var svIntervals           = MergeUtilities.GetSpecificIntervals(ReportFor.StructuralVariants, intervals);
            var allVariantsIntervals  = MergeUtilities.GetSpecificIntervals(ReportFor.AllVariants, intervals);
            var smallVariantIntervals = MergeUtilities.GetSpecificIntervals(ReportFor.SmallVariants, intervals);

            var saPath = Path.Combine(_outputDirectory, $"{ucscRefName}.nsa");

            using (var stream = FileUtilities.GetCreateStream(saPath))
                using (var idxStream = FileUtilities.GetCreateStream(saPath + ".idx"))
                    using (var blockSaWriter = new SaWriter(stream, idxStream, header, smallVariantIntervals, svIntervals, allVariantsIntervals, globalMajorAlleleInRefMinors))
                    {
                        int         position;
                        ISaPosition saPosition;
                        (position, saPosition) = GetNextInterimPosition(saEnumerators);

                        while (saPosition != null)
                        {
                            blockSaWriter.Write(saPosition, position);
                            currentChrAnnotationCount++;
                            (position, saPosition) = GetNextInterimPosition(saEnumerators);
                        }

                        refMinorCount = blockSaWriter.RefMinorCount;
                    }

            Console.WriteLine($"{ucscRefName,-23}  {currentChrAnnotationCount,10:n0}   {intervals.Count,6:n0}    {refMinorCount,6:n0}   {creationBench.GetElapsedIterationTime(currentChrAnnotationCount, "variants", out double _)}");
        }