Ejemplo n.º 1
0
        public void OneAltAlleleTest()
        {
            const string vcfLine = "1	69428	rs140739101	T	G	.	PASS	BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=.";

            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine)[0];

            var sa        = new SupplementaryAnnotationPosition(69428);
            var saCreator = new SupplementaryPositionCreator(sa);

            evsItem.SetSupplementaryAnnotations(saCreator);

            // EA_AC=313,6535;
            // AA_AC=14,3808;
            // TAC=327,10343;

            var evs =
                sa.AlleleSpecificAnnotations["G"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation;

            Assert.NotNull(evs);
            Assert.Equal("0.045707", evs.EvsEur);
            Assert.Equal("0.003663", evs.EvsAfr);
            Assert.Equal("0.030647", evs.EvsAll);
        }
Ejemplo n.º 2
0
        public void ReadAndWriteExacWithMultipleAlleles()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var exacVersion = new DataSourceVersion("ExAC", "0.3.1", DateTime.Parse("2016-03-16").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                exacVersion
            };

            // create our expected supplementary annotations: note AN_adj is modified in this
            const string vcfline =
                "19	3121452	.	TA	T,TAA	17262.47	AC_Adj0_Filter	AC=6,9;AC_AFR=0,0;AC_AMR=0,0;AC_Adj=0,0;AC_EAS=0,0;AC_FIN=0,0;AC_Het=0,0,0;AC_Hom=0,0;AC_NFE=0,0;AC_OTH=0,0;AC_SAS=0,0;AF=4.587e-03,6.881e-03;AN=1308;AN_AFR=0;AN_AMR=0;AN_Adj=3;AN_EAS=0;AN_FIN=0;AN_NFE=0;AN_OTH=0;AN_SAS=0;BaseQRankSum=0.437;DP=2838";

            var sa        = new SupplementaryAnnotationPosition(3121453);
            var saCreator = new SupplementaryPositionCreator(sa);

            var exacReader      = new ExacReader(_renamer);
            var additionalItems = new List <SupplementaryDataItem>();

            foreach (var exacItem in exacReader.ExtractItems(vcfline))
            {
                var currentItem = exacItem.SetSupplementaryAnnotations(saCreator);
                additionalItems.Add(currentItem);
            }
            var currentSa        = new SupplementaryAnnotationPosition(3121453);
            var currentSaCreator = new SupplementaryPositionCreator(currentSa);

            foreach (var exacItem in additionalItems)
            {
                exacItem.SetSupplementaryAnnotations(currentSaCreator);
            }

            // write the supplementary annotation file
            using (
                var writer = new SupplementaryAnnotationWriter(randomPath, "chr19",
                                                               expectedDataSourceVersions))
            {
                writer.Write(currentSaCreator, currentSa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(3121453) as SupplementaryAnnotationPosition;

                Assert.NotNull(observedAnnotation1);

                // we want to make sure we are reading the values we have written

                var expExaciA =
                    currentSa.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExaciA);
                var obsExaciA =
                    observedAnnotation1.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExaciA);

                Assert.Equal(expExaciA.ExacAllAn, obsExaciA.ExacAllAn);

                Assert.Equal(expExaciA.ExacCoverage, obsExaciA.ExacCoverage);
                Assert.Equal(expExaciA.ExacAllAc, obsExaciA.ExacAllAc);
                Assert.NotNull(obsExaciA.ExacAllAc);
                Assert.Null(obsExaciA.ExacFinAc);
                Assert.Null(obsExaciA.ExacFinAn);

                // we want to make sure we are reading the values we have written

                var expExac1 =
                    currentSa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExac1);
                var obsExac1 =
                    observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExac1);

                Assert.Equal(expExac1.ExacAllAn, obsExac1.ExacAllAn);

                Assert.Equal(expExac1.ExacCoverage, obsExac1.ExacCoverage);
                Assert.Equal(expExac1.ExacAllAc, obsExac1.ExacAllAc);
                Assert.NotNull(obsExac1.ExacAllAc);
                Assert.Null(obsExac1.ExacFinAc);
                Assert.Null(obsExac1.ExacFinAn);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 3
0
        public void ReadWriteDbSnpCosmic()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion  = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, cosmicVersion
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	10228	rs143255646	TA	T	.	.	RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP";

            var sa        = new SupplementaryAnnotationPosition(10229);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem1      = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem1.SetSupplementaryAnnotations(saCreator)
            };

            var cosmicItem1 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53",
                                             new HashSet <CosmicItem.CosmicStudy> {
                new CosmicItem.CosmicStudy("", "carcinoma", "oesophagus")
            }, null);
            var cosmicItem2 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53",
                                             new HashSet <CosmicItem.CosmicStudy> {
                new CosmicItem.CosmicStudy("01", "carcinoma", "large_intestine")
            }, null);

            additionalItems.Add(cosmicItem1.SetSupplementaryAnnotations(saCreator));
            additionalItems.Add(cosmicItem2.SetSupplementaryAnnotations(saCreator));

            //sa.Clear();
            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(saCreator);
            }

            Assert.Equal(1, sa.CosmicItems.Count);
            // the preceeding code has been unit tested in  MergeDbSnpCosmic()

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(10229) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);

                var expDbSnp =
                    sa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                    DbSnpAnnotation;
                Assert.NotNull(expDbSnp);
                var obsDbSnp =
                    observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                    DbSnpAnnotation;
                Assert.NotNull(obsDbSnp);

                Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp);
                Assert.True(observedAnnotation1.ContainsCosmicId(sa.CosmicItems[0].ID));
                Assert.Equal(1, observedAnnotation1.CosmicItems.Count);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 4
0
        public void ReadWriteExacDbsnp()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion  = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, cosmicVersion
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "2	48010488	rs1042821	G	A	.	.	RS=1042821;RSPOS=48010488;RV;dbSNPBuildID=86;SSR=0;SAO=1;VP=0x050168420a05150136100100;GENEINFO=MSH6:2956;WGT=1;VC=SNV;PM;PMC;SLO;NSM;REF;U5;R5;ASP;VLD;G5;GNO;KGPhase1;KGPhase3;LSD;CAF=0.7991,0.2009;COMMON=1";
            const string vcfLine2 =
                "2	48010488	rs1042821	G	A,C	14068898.15	PASS	AC=21019,1;AC_AFR=1700,0;AC_AMR=1015,1;AC_Adj=19510,1;AC_EAS=1973,0;AC_FIN=743,0;AC_Het=15722,1,0;AC_Hom=1894,0;AC_NFE=10593,0;AC_OTH=147,0;AC_SAS=3339,0;AF=0.178,8.487e-06;AN=117830;AN_AFR=6388;AN_AMR=9014;AN_Adj=91130;AN_EAS=6792;AN_FIN=5078;AN_NFE=48404;AN_OTH=664;AN_SAS=14790;BaseQRankSum=-4.850e-01;ClippingRankSum=-1.400e-01;DB;DP=1206681;FS=0.000;GQ_MEAN=129.86;GQ_STDDEV=221.88;Het_AFR=1322,0,0;Het_AMR=931,1,0;Het_EAS=1511,0,0;Het_FIN=665,0,0;Het_NFE=8585,0,0;Het_OTH=111,0,0;Het_SAS=2597,0,0;Hom_AFR=189,0;Hom_AMR=42,0;Hom_EAS=231,0;Hom_FIN=39,0;Hom_NFE=1004,0;Hom_OTH=18,0;Hom_SAS=371,0;InbreedingCoeff=0.0376;MQ=60.00;MQ0=0;MQRankSum=0.00;NCC=3737;POSITIVE_TRAIN_SITE;QD=17.46;ReadPosRankSum=0.181;VQSLOD=5.87;culprit=MQ;DP_HIST=3051|9435|11318|5521|9711|11342|4131|1270|615|404|328|266|264|262|196|186|126|115|97|277,133|968|2180|3402|3564|2815|1772|954|551|389|321|263|261|261|196|186|126|115|97|277,0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=949|2966|347|492|15135|1435|1335|854|421|526|590|416|13672|1951|445|462|255|174|211|16279,24|79|81|124|135|96|110|118|97|180|228|137|182|191|126|171|180|151|192|16229,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1";

            var sa        = new SupplementaryAnnotationPosition(48010488);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem1  = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem1.SetSupplementaryAnnotations(saCreator);

            var exacReader = new ExacReader(_renamer);

            foreach (var exacItem in exacReader.ExtractItems(vcfLine2))
            {
                exacItem.SetSupplementaryAnnotations(saCreator);
            }


            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr2", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(48010488) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);


                var expDbSnpA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                var obsDbSnpA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                Assert.NotNull(expDbSnpA);
                Assert.NotNull(obsDbSnpA);

                // we want to make sure we are reading the values we have written
                Assert.Equal(expDbSnpA.DbSnp, obsDbSnpA.DbSnp);


                var expExacA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacA);
                Assert.NotNull(obsExacA);

                Assert.Equal(expExacA.ExacAllAn, obsExacA.ExacAllAn);
                Assert.Equal(expExacA.ExacCoverage, obsExacA.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacA.ExacAllAc), Convert.ToDouble(obsExacA.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAfrAc), Convert.ToDouble(obsExacA.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAmrAc), Convert.ToDouble(obsExacA.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacEasAc), Convert.ToDouble(obsExacA.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacFinAc), Convert.ToDouble(obsExacA.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacNfeAc), Convert.ToDouble(obsExacA.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacOthAc), Convert.ToDouble(obsExacA.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacSasAc), Convert.ToDouble(obsExacA.ExacSasAc));


                var expExacC =
                    sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacC =
                    observedAnnotation1.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacC);
                Assert.NotNull(obsExacC);

                Assert.Equal(expExacC.ExacCoverage, obsExacC.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacC.ExacAllAc), Convert.ToDouble(obsExacC.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAfrAc), Convert.ToDouble(obsExacC.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAmrAc), Convert.ToDouble(obsExacC.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacEasAc), Convert.ToDouble(obsExacC.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacFinAc), Convert.ToDouble(obsExacC.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacNfeAc), Convert.ToDouble(obsExacC.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacOthAc), Convert.ToDouble(obsExacC.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacSasAc), Convert.ToDouble(obsExacC.ExacSasAc));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 5
0
        public void MultiAlleleMergeDbSnp1KpEvsSaRw()
        {
            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	1564952	rs112177324	TG	T	.	.	RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1";
            const string vcfLine2 = "1	1564952	rs112177324	TG	TGG,T	100	PASS	AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136";
            const string vcfLine3 = "1	1564952	rs112177324	TG	TGG,T	.	PASS	BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=.";

            var sa        = new SupplementaryAnnotationPosition(1564953);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem       = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem.SetSupplementaryAnnotations(saCreator)
            };

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator));

            var evsReader    = new EvsReader(_renamer);
            var evsItemsList = evsReader.ExtractItems(vcfLine3);

            foreach (var evsItem in evsItemsList)
            {
                additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator));
            }

            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(saCreator);
            }

            // write the supplementary annotation file
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // checking the global alleles
                Assert.Null(sa.GlobalMajorAllele);
                Assert.Null(sa.GlobalMajorAlleleFrequency);
                Assert.Null(sa.GlobalMinorAllele);
                Assert.Null(sa.GlobalMinorAlleleFrequency);

                // extract the three annotations
                var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation);

                var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc;
                var expectedDelHasOneKg   = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg);

                var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr;

                var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp);

                var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"];
                var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"];

                Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc);
                Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg));

                Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr);
                Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 6
0
        public void ReadAndWriteDbSnp1KgEvs()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	69428	rs140739101	T	G	.	.	RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1";
            const string vcfLine2 = "1	69428	rs140739101	T	G	100	PASS	AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||";
            const string vcfLine3 = "1	69428	rs140739101	T	G	.	PASS	BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=.";

            const string altAllele = "G";
            var          sa        = new SupplementaryAnnotationPosition(69428);
            var          saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem   = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem.SetSupplementaryAnnotations(saCreator);

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            oneKGenItem.SetSupplementaryAnnotations(saCreator);

            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine3)[0];

            evsItem.SetSupplementaryAnnotations(saCreator);

            // the preceeding code has been unit tested in  MergeDbSnp1kpEvs()

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);

                var expDbSnp =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                    as DbSnpAnnotation;
                Assert.NotNull(expDbSnp);

                var expOneKg =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                    as OneKGenAnnotation;
                Assert.NotNull(expOneKg);

                var expEvs =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                    as EvsAnnotation;
                Assert.NotNull(expEvs);

                var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                               as DbSnpAnnotation;
                Assert.NotNull(obsDbSnp);

                var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                               as OneKGenAnnotation;
                Assert.NotNull(obsOneKg);

                var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                             as EvsAnnotation;
                Assert.NotNull(obsEvs);

                Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp);

                Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll);
                Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc);

                Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage);
                Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Ejemplo n.º 7
0
        public void ReadAndWrite()
        {
            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(_randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;
                var refSeq         = reader.Header.ReferenceSequenceName;
                var dataVersion    = reader.Header.DataVersion;
                var creationTime   = reader.Header.CreationTimeTicks;
                var genomeAssembly = reader.Header.GenomeAssembly;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 1);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(_expectedDataSourceVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(_expectedDataSourceVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(_expectedDataSourceVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);
                Assert.NotNull(refSeq);
                Assert.Equal(SupplementaryAnnotationCommon.DataVersion, dataVersion);
                Assert.True(DateTime.MinValue.Ticks != creationTime);
                Assert.True(genomeAssembly == GenomeAssembly.Unknown);


                var expDbsnp1 =
                    ((DbSnpAnnotation)
                     _expectedAnnotation1.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var expDbsnp2 =
                    ((DbSnpAnnotation)
                     _expectedAnnotation2.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var expDbsnp3 =
                    ((DbSnpAnnotation)
                     _expectedAnnotation3.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;

                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(100) as SupplementaryAnnotationPosition;
                var observedAnnotation2 = reader.GetAnnotation(101) as SupplementaryAnnotationPosition;
                var observedAnnotation3 = reader.GetAnnotation(102) as SupplementaryAnnotationPosition;

                var obsDbsnp1 =
                    ((DbSnpAnnotation)
                     observedAnnotation1.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var obsDbsnp2 =
                    ((DbSnpAnnotation)
                     observedAnnotation2.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var obsDbsnp3 =
                    ((DbSnpAnnotation)
                     observedAnnotation3.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;


                Assert.Equal(expDbsnp1, obsDbsnp1);
                Assert.Equal(expDbsnp2, obsDbsnp2);
                Assert.Equal(expDbsnp3, obsDbsnp3);

                // jump around the file
                var observedJumpAnnotation2 = reader.GetAnnotation(_expectedAnnotation2.ReferencePosition) as SupplementaryAnnotationPosition;
                var observedJumpAnnotation1 = reader.GetAnnotation(_expectedAnnotation1.ReferencePosition) as SupplementaryAnnotationPosition;
                var observedJumpAnnotation3 = reader.GetAnnotation(_expectedAnnotation3.ReferencePosition) as SupplementaryAnnotationPosition;
                var obsJumpDbsnp1           =
                    ((DbSnpAnnotation)
                     observedJumpAnnotation1.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var obsJumpDbsnp2 =
                    ((DbSnpAnnotation)
                     observedJumpAnnotation2.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;
                var obsJumpDbsnp3 =
                    ((DbSnpAnnotation)
                     observedJumpAnnotation3.AlleleSpecificAnnotations[AltAllele].Annotations[
                         DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]).DbSnp;


                Assert.Equal(expDbsnp1, obsJumpDbsnp1);
                Assert.Equal(expDbsnp2, obsJumpDbsnp2);
                Assert.Equal(expDbsnp3, obsJumpDbsnp3);

                var observedInterval = reader.GetSupplementaryIntervals(_renamer);
                Assert.Equal(_expectedInterval, observedInterval.First());
            }
        }
Ejemplo n.º 8
0
        public void MultiAlleleMergeDbSnp1KpEvs()
        {
            const string vcfLine1 = "1	1564952	rs112177324	TG	T	.	.	RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1";
            const string vcfLine2 = "1	1564952	rs112177324	TG	TGG,T	100	PASS	AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136";
            const string vcfLine3 = "1	1564952	rs112177324	TG	TGG,T	.	PASS	BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=.";

            var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(1564952));

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem       = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem.SetSupplementaryAnnotations(sa)
            };

            foreach (var oneKitem in _oneKGenReader.ExtractItems(vcfLine2))
            {
                additionalItems.Add(oneKitem.SetSupplementaryAnnotations(sa));
            }

            var evsReader    = new EvsReader(_renamer);
            var evsItemsList = evsReader.ExtractItems(vcfLine3);

            foreach (var evsItem in evsItemsList)
            {
                additionalItems.Add(evsItem.SetSupplementaryAnnotations(sa));
            }

            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(sa);
            }



            var asa1   = sa.SaPosition.AlleleSpecificAnnotations["1"];
            var dbSnp1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
            var oneKg1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation;
            var evs1   = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation;

            Assert.NotNull(dbSnp1);
            Assert.NotNull(oneKg1);
            Assert.NotNull(evs1);

            var asaiG   = sa.SaPosition.AlleleSpecificAnnotations["iG"];
            var oneKgiG = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation;
            var evsiG   = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation;

            Assert.NotNull(oneKgiG);
            Assert.NotNull(evsiG);

            Assert.Equal(new List <long> {
                112177324
            }, dbSnp1.DbSnp);

            var oneKggAc = oneKgiG.OneKgAllAc;
            var oneKggAn = oneKgiG.OneKgAllAn;
            var oneKg1Ac = oneKg1.OneKgAllAc;
            var oneKg1An = oneKg1.OneKgAllAn;

            Assert.NotNull(oneKggAc);
            Assert.NotNull(oneKggAn);
            Assert.NotNull(oneKg1Ac);
            Assert.NotNull(oneKg1An);

            Assert.Equal("0.002596", (oneKggAc.Value / (double)oneKggAn.Value).ToString(JsonCommon.FrequencyRoundingFormat));
            Assert.Equal("0.150559", (oneKg1Ac.Value / (double)oneKg1An.Value).ToString(JsonCommon.FrequencyRoundingFormat));

            Assert.Equal("0.012380", evsiG.EvsAfr);
            Assert.Equal("0.000258", evsiG.EvsEur);
            Assert.Equal("0.004072", evsiG.EvsAll);

            Assert.Equal("0.078503", evs1.EvsAfr);
            Assert.Equal("0.392534", evs1.EvsEur);
            Assert.Equal("0.293732", evs1.EvsAll);
        }