public void MergeConflictingEvsItems() { const string vcfLine1 = "1 1564952 rs112177324 T G,A . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; const string vcfLine2 = "1 1564952 rs140739101 T A . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(1564952)); var sa2 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(1564952)); var evsReader = new EvsReader(_renamer); var evsItems = evsReader.ExtractItems(vcfLine1); foreach (var item in evsItems) { item.SetSupplementaryAnnotations(sa1); } evsItems.Clear(); evsItems.AddRange(evsReader.ExtractItems(vcfLine2)); foreach (var item in evsItems) { item?.SetSupplementaryAnnotations(sa2); } sa1.MergeSaCreator(sa2); var evs = sa1.SaPosition.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(evs); Assert.True(evs.HasConflicts); }
public void MergeDbSnp1KpEvsRefMinor() { const string vcfLine1 = "1 69428 rs140739101 T G . . RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1"; //vcf line is modified const string vcfLine2 = "1 69428 rs140739101 T G 100 PASS AC=4956;AF=0.989617;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||;VT=SNP;EX_TARGET;EAS_AN=1008;EAS_AC=3;EUR_AN=1006;EUR_AC=50;AFR_AN=1322;AFR_AC=2;AMR_AN=694;AMR_AC=25;SAS_AN=978;SAS_AC=15"; const string vcfLine3 = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(69428)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem.SetSupplementaryAnnotations(sa); var oneKGenItem = _oneKGenReader.ExtractItems(vcfLine2)[0]; oneKGenItem.SetSupplementaryAnnotations(sa); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine3)[0]; evsItem.SetSupplementaryAnnotations(sa); sa.FinalizePositionalAnnotations(); var dbSnp = sa.SaPosition.AlleleSpecificAnnotations["G"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnp); Assert.Equal(new List <long> { 140739101 }, dbSnp.DbSnp); Assert.Equal(true, sa.SaPosition.IsRefMinorAllele); }
public void NumEvsSamplesTest() { const string vcfLine = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine); var sa = new SupplementaryAnnotationPosition(1564953); var saCreator = new SupplementaryPositionCreator(sa); var additionalItems = new List <SupplementaryDataItem>(); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(saCreator); } var evs = sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(evs); Assert.Equal("5648", evs.NumEvsSamples);//GTC=4,5,33,748,1817,3041; }
public void Merge1KgEvsExac() { const string vcfLine1 = "1 13382 rs191719684 C G . PASS DBSNP=dbSNP_135;EA_AC=0,8600;AA_AC=17,4389;TAC=17,12989;MAF=0.0,0.3858,0.1307;GTS=GG,GC,CC;EA_GTC=0,0,4300;AA_GTC=0,17,2186;GTC=0,17,6486;DP=54;GL=SAMD11;CP=0.0;CG=1.5;AA=C;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_152486.2:intron;HGVS_CDNA_VAR=NM_152486.2:c.-30C>G;HGVS_PROTEIN_VAR=.;CDS_SIZES=NM_152486.2:2046;GS=.;PH=.;EA_AGE=.;AA_AGE=24.3+/-50.5"; const string vcfLine2 = "1 13382 . C G 320.40 VQSRTrancheSNP99.60to99.80 AC=3;AC_AFR=0;AC_AMR=0;AC_Adj=1;AC_EAS=0;AC_FIN=0;AC_Het=1;AC_Hom=0;AC_NFE=0;AC_OTH=0;AC_SAS=1;AF=1.079e-04;AN=27810;AN_AFR=460;AN_AMR=82;AN_Adj=5728;AN_EAS=148;AN_FIN=4;AN_NFE=1400;AN_OTH=60;AN_SAS=3574;BaseQRankSum=-8.880e-01;ClippingRankSum=0.493;DP=86138;FS=0.000;GQ_MEAN=11.35;GQ_STDDEV=12.58;Het_AFR=0;Het_AMR=0;Het_EAS=0;Het_FIN=0;Het_NFE=0;Het_OTH=0;Het_SAS=1;Hom_AFR=0;Hom_AMR=0;Hom_EAS=0;Hom_FIN=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;InbreedingCoeff=-0.0832;MQ=34.49;MQ0=0;MQRankSum=-6.910e-01;NCC=72140;QD=20.03;ReadPosRankSum=-2.073e+00;VQSLOD=-4.106e+00;culprit=MQ;DP_HIST=9135|1821|1658|665|130|135|199|110|41|8|2|1|0|0|0|0|0|0|0|0,1|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=1432|8682|140|118|2625|254|121|17|3|1|10|24|364|94|9|11|0|0|0|0,0|1|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0;DOUBLETON_DIST=.;AC_MALE=1;AC_FEMALE=0;AN_MALE=3866;AN_FEMALE=1862;AC_CONSANGUINEOUS=0;AN_CONSANGUINEOUS=684;Hom_CONSANGUINEOUS=0"; const string vcfLine3 = "1 13382 rs538606945 C G 100 PASS AC=1;AF=0.000199681;AN=5008;NS=2504;DP=28817;EAS_AF=0;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0.001;AA=c|||;VT=SNP"; var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine1)[0]; evsItem.SetSupplementaryAnnotations(sa1); var sa2 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var exacReader = new ExacReader(_renamer); var exacItem = exacReader.ExtractItems(vcfLine2)[0]; exacItem.SetSupplementaryAnnotations(sa2); var sa3 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var onekReader = new OneKGenReader(_renamer); var onekItem = onekReader.ExtractItems(vcfLine3)[0]; onekItem.SetSupplementaryAnnotations(sa3); sa1.MergeSaCreator(sa2); sa1.MergeSaCreator(sa3); var asa = sa1.SaPosition.AlleleSpecificAnnotations["G"]; var exac = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evs = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(exac); Assert.NotNull(oneKg); Assert.NotNull(evs); var oneKgAc = oneKg.OneKgAllAc; var oneKgAn = oneKg.OneKgAllAn; var exacAllAc = exac.ExacAllAc; var exacAllAn = exac.ExacAllAn; Assert.NotNull(oneKgAc); Assert.NotNull(oneKgAn); Assert.NotNull(exacAllAc); Assert.NotNull(exacAllAn); Assert.Equal("0.0002", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.001307", evs.EvsAll); Assert.Equal("0.000175", (exacAllAc.Value / (double)exacAllAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); }
public void EvsDepthFieldTest() { const string vcfLine = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; var fileInfo = new StreamReader(new MemoryStream()); var evsReader = new EvsReader(fileInfo, _refChromDict); var evs = evsReader.ExtractItems(vcfLine)[0]; Assert.NotNull(evs); const string expectedRes = "\"sampleCount\":5335,\"coverage\":110,\"allAf\":0.030647,\"afrAf\":0.003663,\"eurAf\":0.045707"; Assert.Equal(expectedRes, evs.GetJsonString()); }
public void EvsDepthFieldTest() { const string vcfLine = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine)[0]; var sa = new SupplementaryAnnotationPosition(69428); var saCreator = new SupplementaryPositionCreator(sa); evsItem.SetSupplementaryAnnotations(saCreator); var evs = sa.AlleleSpecificAnnotations["G"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(evs); Assert.Equal("110", evs.EvsCoverage); }
public void NumEvsSamplesTest() { const string vcfLine = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var fileInfo = new StreamReader(new MemoryStream()); var evsReader = new EvsReader(fileInfo, _refChromDict); var evs = evsReader.ExtractItems(vcfLine); Assert.NotNull(evs); Assert.Equal(2, evs.Count); const string expectedRes1 = "\"sampleCount\":5648,\"coverage\":10,\"allAf\":0.004072,\"afrAf\":0.012380,\"eurAf\":0.000258"; Assert.Equal(expectedRes1, evs[0].GetJsonString()); Assert.Equal("TGG", evs[0].AlternateAllele); const string expectedRes2 = "\"sampleCount\":5648,\"coverage\":10,\"allAf\":0.293732,\"afrAf\":0.078503,\"eurAf\":0.392534"; Assert.Equal(expectedRes2, evs[1].GetJsonString()); Assert.Equal("T", evs[1].AlternateAllele); }
public void MultiAlleleMergeDbSnp1KpEvsSaRw() { // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 1564952 rs112177324 TG T . . RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1"; const string vcfLine2 = "1 1564952 rs112177324 TG TGG,T 100 PASS AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136"; const string vcfLine3 = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryAnnotationPosition(1564953); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem.SetSupplementaryAnnotations(saCreator) }; var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator)); var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine3); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(saCreator); } // write the supplementary annotation file var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // checking the global alleles Assert.Null(sa.GlobalMajorAllele); Assert.Null(sa.GlobalMajorAlleleFrequency); Assert.Null(sa.GlobalMinorAllele); Assert.Null(sa.GlobalMinorAlleleFrequency); // extract the three annotations var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation); var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc; var expectedDelHasOneKg = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg); var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr; var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp); var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"]; var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"]; Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc); Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg)); Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr); Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp)); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void ReadAndWriteDbSnp1KgEvs() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 69428 rs140739101 T G . . RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1"; const string vcfLine2 = "1 69428 rs140739101 T G 100 PASS AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||"; const string vcfLine3 = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; const string altAllele = "G"; var sa = new SupplementaryAnnotationPosition(69428); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem.SetSupplementaryAnnotations(saCreator); var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; oneKGenItem.SetSupplementaryAnnotations(saCreator); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine3)[0]; evsItem.SetSupplementaryAnnotations(saCreator); // the preceeding code has been unit tested in MergeDbSnp1kpEvs() // write the supplementary annotation file using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation1); var expDbSnp = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(expDbSnp); var expOneKg = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(expOneKg); var expEvs = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(expEvs); var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(obsDbSnp); var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(obsOneKg); var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(obsEvs); Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp); Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll); Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc); Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage); Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void MultiAlleleMergeDbSnp1KpEvs() { const string vcfLine1 = "1 1564952 rs112177324 TG T . . RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1"; const string vcfLine2 = "1 1564952 rs112177324 TG TGG,T 100 PASS AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136"; const string vcfLine3 = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(1564952)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem.SetSupplementaryAnnotations(sa) }; foreach (var oneKitem in _oneKGenReader.ExtractItems(vcfLine2)) { additionalItems.Add(oneKitem.SetSupplementaryAnnotations(sa)); } var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine3); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(sa)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(sa); } var asa1 = sa.SaPosition.AlleleSpecificAnnotations["1"]; var dbSnp1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; var oneKg1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evs1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(dbSnp1); Assert.NotNull(oneKg1); Assert.NotNull(evs1); var asaiG = sa.SaPosition.AlleleSpecificAnnotations["iG"]; var oneKgiG = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evsiG = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(oneKgiG); Assert.NotNull(evsiG); Assert.Equal(new List <long> { 112177324 }, dbSnp1.DbSnp); var oneKggAc = oneKgiG.OneKgAllAc; var oneKggAn = oneKgiG.OneKgAllAn; var oneKg1Ac = oneKg1.OneKgAllAc; var oneKg1An = oneKg1.OneKgAllAn; Assert.NotNull(oneKggAc); Assert.NotNull(oneKggAn); Assert.NotNull(oneKg1Ac); Assert.NotNull(oneKg1An); Assert.Equal("0.002596", (oneKggAc.Value / (double)oneKggAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.150559", (oneKg1Ac.Value / (double)oneKg1An.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.012380", evsiG.EvsAfr); Assert.Equal("0.000258", evsiG.EvsEur); Assert.Equal("0.004072", evsiG.EvsAll); Assert.Equal("0.078503", evs1.EvsAfr); Assert.Equal("0.392534", evs1.EvsEur); Assert.Equal("0.293732", evs1.EvsAll); }