public void Gene_in_gene_annotation_database_get_annotated() { var annotatedGene = new AnnotatedGene("A2M", new IGeneAnnotationSource[] { new GeneAnnotationSource("omim", new[] { "{\"mimNumber\":103950,\"description\":\"Alpha-2-macroglobulin\",\"phenotypes\":[{\"mimNumber\":614036,\"phenotype\":\"Alpha-2-macroglobulin deficiency\",\"mapping\":\"mapping of the wildtype gene\",\"inheritances\":[\"Autosomal dominant\"]}", "{\"mimNumber\":104300,\"phenotype\":\"Alzheimer disease, susceptibility to\",\"mapping\":\"molecular basis of the disorder is known\",\"inheritances\":[\"Autosomal dominant\"],\"comments\":\"contribute to susceptibility to multifactorial disorders or to susceptibility to infection\"}]}" }, true) }); var ms = new MemoryStream(); var header = new SupplementaryAnnotationHeader("", DateTime.Now.Ticks, 1, new IDataSourceVersion[] { }, GenomeAssembly.Unknown); using (var writer = new GeneDatabaseWriter(ms, header, true)) { writer.Write(annotatedGene); } ms.Position = 0; var reader = new GeneDatabaseReader(ms); var geneAnnotationProvider = new GeneAnnotationProvider(reader); var observedAnnotation = geneAnnotationProvider.Annotate("A2M"); var observedAnnotation2 = geneAnnotationProvider.Annotate("A2M2L"); Assert.NotNull(observedAnnotation); Assert.Null(observedAnnotation2); Assert.Single(observedAnnotation.Annotations); Assert.Equal("omim", observedAnnotation.Annotations[0].DataSource); }
public void SaReader_And_SaWriter_Tests() { var saMs = new MemoryStream(); var indexMs = new MemoryStream(); var dataSourceVersions = new[] { new DataSourceVersion("clinvar", "20", DateTime.Today.Ticks, "clinvar dataset"), new DataSourceVersion("dbSnp", "18", DateTime.Parse("12/20/2010").Ticks, "dbSNP") }; var header = new SupplementaryAnnotationHeader("chr1", DateTime.Now.Ticks, 1, dataSourceVersions, GenomeAssembly.GRCh37); var smallIntervals = new List <ISupplementaryInterval> { new SupplementaryInterval("data1", "chr1", 100, 150, "", ReportFor.SmallVariants) }; var svIntervals = new List <ISupplementaryInterval> { new SupplementaryInterval("data2", "chr1", 100, 1000, "", ReportFor.StructuralVariants) }; var allIntervals = new List <ISupplementaryInterval> { new SupplementaryInterval("data3", "chr1", 100, 1000, "", ReportFor.AllVariants) }; var saDataSources = new ISaDataSource[4]; saDataSources[0] = new SaDataSource("data1", "data1", "A", false, true, "acd", new[] { "\"id\":\"123\"" }); saDataSources[1] = new SaDataSource("data2", "data2", "T", false, true, "acd", new[] { "\"id\":\"123\"" }); saDataSources[2] = new SaDataSource("data3", "data3", "A", false, false, "acd", new[] { "\"id\":\"123\"" }); saDataSources[3] = new SaDataSource("data4", "data4", "T", false, false, "acd", new[] { "\"id\":\"123\"" }); var saPos = new SaPosition(saDataSources, "A"); using (var saWriter = new SaWriter(saMs, indexMs, header, smallIntervals, svIntervals, allIntervals, new List <(int, string)>(), true)) { saWriter.Write(saPos, 150); } saMs.Position = 0; indexMs.Position = 0; ISaPosition obseveredPosition, obseveredPosition2; using (var saReader = new SaReader(saMs, indexMs)) { obseveredPosition = saReader.GetAnnotation(150); obseveredPosition2 = saReader.GetAnnotation(200); } Assert.Equal("A", obseveredPosition.GlobalMajorAllele); Assert.Equal(4, obseveredPosition.DataSources.Length); Assert.Null(obseveredPosition2); }
private static void MergeGene(IReadOnlyList <GeneTsvReader> geneReaders, IEnumerable <SaHeader> geneHeaders, string outputDirectory, GenomeAssembly assembly) { var geneAnnotationDatabasePath = Path.Combine(outputDirectory, SaDataBaseCommon.GeneLevelAnnotationFileName); var geneAnnotationStream = FileUtilities.GetCreateStream(geneAnnotationDatabasePath); var databaseHeader = new SupplementaryAnnotationHeader("", DateTime.Now.Ticks, SaDataBaseCommon.DataVersion, geneHeaders.Select(x => x.GetDataSourceVersion()), assembly); using (var writer = new GeneDatabaseWriter(geneAnnotationStream, databaseHeader)) { foreach (var annotatedGene in GetAnnotatedGenes(geneReaders) ?? Enumerable.Empty <IAnnotatedGene>()) { writer.Write(annotatedGene); } } }
private void MergeChrom(string refName) { var creationBench = new Benchmark(); var currentChrAnnotationCount = 0; int refMinorCount; var saEnumerators = GetSaEnumerators(refName); //return; var globalMajorAlleleInRefMinors = GetGlobalMajorAlleleForRefMinors(refName); var dataSourceVersions = MergeUtilities.GetDataSourceVersions(_saHeaders); var ucscRefName = _refNameToChromosome[refName].UcscName; var header = new SupplementaryAnnotationHeader(ucscRefName, DateTime.Now.Ticks, SaDataBaseCommon.DataVersion, dataSourceVersions, _genomeAssembly); //we need a list because we will enumerate over it multiple times var intervals = MergeUtilities.GetIntervals(_intervalReaders, refName).OrderBy(x => x.Start).ThenBy(x => x.End).ToList(); var svIntervals = MergeUtilities.GetSpecificIntervals(ReportFor.StructuralVariants, intervals); var allVariantsIntervals = MergeUtilities.GetSpecificIntervals(ReportFor.AllVariants, intervals); var smallVariantIntervals = MergeUtilities.GetSpecificIntervals(ReportFor.SmallVariants, intervals); var saPath = Path.Combine(_outputDirectory, $"{ucscRefName}.nsa"); using (var stream = FileUtilities.GetCreateStream(saPath)) using (var idxStream = FileUtilities.GetCreateStream(saPath + ".idx")) using (var blockSaWriter = new SaWriter(stream, idxStream, header, smallVariantIntervals, svIntervals, allVariantsIntervals, globalMajorAlleleInRefMinors)) { int position; ISaPosition saPosition; (position, saPosition) = GetNextInterimPosition(saEnumerators); while (saPosition != null) { blockSaWriter.Write(saPosition, position); currentChrAnnotationCount++; (position, saPosition) = GetNextInterimPosition(saEnumerators); } refMinorCount = blockSaWriter.RefMinorCount; } Console.WriteLine($"{ucscRefName,-23} {currentChrAnnotationCount,10:n0} {intervals.Count,6:n0} {refMinorCount,6:n0} {creationBench.GetElapsedIterationTime(currentChrAnnotationCount, "variants", out double _)}"); }