public void MergeDbSnpItems() { const string vcfLine1 = "1 10228 rs143255646 TA T . . RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP"; const string vcfLine2 = "1 10228 rs200462216 TAACCCCTAACCCTAACCCTAAACCCTA T . . RS=200462216;RSPOS=10229;dbSNPBuildID=137;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP"; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(10229)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem1 = dbsnpReader.ExtractItem(vcfLine1)[0]; var dbSnpItem2 = dbsnpReader.ExtractItem(vcfLine2)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem1.SetSupplementaryAnnotations(sa), dbSnpItem2.SetSupplementaryAnnotations(sa) }; //sa.Clear(); foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(sa); } var dbSnp1 = sa.SaPosition.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnp1); var dbSnp27 = sa.SaPosition.AlleleSpecificAnnotations["27"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnp27); Assert.Equal(dbSnp1.DbSnp, new List <long> { 143255646 }); Assert.Equal(dbSnp27.DbSnp, new List <long> { 200462216 }); }
public void NoMinorAllele() { const string vcfLine = "17 828 rs62053745 T C . . RS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=.,0.7424;COMMON=1"; var dbsnpReader = new DbSnpReader(null, _refChromDict); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine)[0]; Assert.Equal("C", dbSnpEntry.AlternateAllele); Assert.Equal(0.7424, dbSnpEntry.AltAlleleFreq); Assert.Equal(double.MinValue, dbSnpEntry.RefAlleleFreq); }
public void MergeDbSnpCosmic1Kg() { const string vcfLine1 = "1 10228 rs143255646 TA T . . RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP"; const string vcfLine2 = "1 10228 . TA T 100 PASS AC=2130;AF=0.425319;AN=5008;NS=2504;DP=103152;EAS_AF=0.3363;AMR_AF=0.3602;AFR_AF=0.4909;EUR_AF=0.4056;SAS_AF=0.4949;AA=|||unknown(NO_COVERAGE)"; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(10229)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem1 = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem1.SetSupplementaryAnnotations(sa) }; var cosmicItem1 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy(null, "carcinoma", "oesophagus") }, null); additionalItems.Add(cosmicItem1.SetSupplementaryAnnotations(sa)); var oneKGenItem = _oneKGenReader.ExtractItems(vcfLine2)[0]; additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(sa)); //sa.Clear(); foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(sa); } var asa = sa.SaPosition.AlleleSpecificAnnotations["1"]; var dbSnp = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(oneKg); var oneKgAc = oneKg.OneKgAllAc; var oneKgAn = oneKg.OneKgAllAn; Assert.NotNull(oneKgAc); Assert.NotNull(oneKgAn); Assert.NotNull(dbSnp); Assert.NotNull(oneKg); Assert.Equal(dbSnp.DbSnp, new List <long> { 143255646 }); Assert.Equal("0.425319", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.True(sa.SaPosition.ContainsCosmicId("COSM1000")); }
public void MissingDbsnpId() { const string vcfLine = "X 21505833 rs12395602 G A,C,T . . RS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1"; var dbsnpReader = new DbSnpReader(null, _refChromDict); var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine); Assert.Equal(3, dbSnpEntries.Count); Assert.Equal("A", dbSnpEntries[0].AlternateAllele); Assert.Equal(12395602, dbSnpEntries[0].RsId); Assert.Equal("C", dbSnpEntries[1].AlternateAllele); Assert.Equal(12395602, dbSnpEntries[1].RsId); Assert.Equal("T", dbSnpEntries[2].AlternateAllele); Assert.Equal(12395602, dbSnpEntries[2].RsId); }
public void MergeDbSnp1KpEvs() { const string vcfLine1 = "1 69428 rs140739101 T G . . RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1"; const string vcfLine2 = "1 69428 rs140739101 T G 100 PASS AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||;VT=SNP;EX_TARGET;EAS_AN=1008;EAS_AC=3;EUR_AN=1006;EUR_AC=50;AFR_AN=1322;AFR_AC=2;AMR_AN=694;AMR_AC=25;SAS_AN=978;SAS_AC=15"; const string vcfLine3 = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(69428)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem.SetSupplementaryAnnotations(sa); var oneKGenItem = _oneKGenReader.ExtractItems(vcfLine2)[0]; oneKGenItem.SetSupplementaryAnnotations(sa); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine3)[0]; evsItem.SetSupplementaryAnnotations(sa); var asa = sa.SaPosition.AlleleSpecificAnnotations["G"]; var dbSnp = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evs = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(dbSnp); Assert.NotNull(oneKg); Assert.NotNull(evs); var oneKgAc = oneKg.OneKgEurAc; var oneKgAn = oneKg.OneKgEurAn; Assert.NotNull(oneKgAc); Assert.NotNull(oneKgAn); Assert.Equal(new List <long> { 140739101 }, dbSnp.DbSnp); Assert.Equal("0.049702", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.045707", evs.EvsEur); Assert.False(sa.SaPosition.IsRefMinorAllele); }
public void MissingDbsnpId() { const string vcfLine = "X 21505833 rs12395602 G A,C,T . . RS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(21505833, "G", 'G', ChromosomeUtilities.RefNameToChromosome); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine).ToList(); Assert.Equal(3, dbSnpEntries.Count); Assert.Equal("A", dbSnpEntries[0].AltAllele); Assert.Equal(12395602, dbSnpEntries[0].RsId); Assert.Equal("C", dbSnpEntries[1].AltAllele); Assert.Equal(12395602, dbSnpEntries[1].RsId); Assert.Equal("T", dbSnpEntries[2].AltAllele); Assert.Equal(12395602, dbSnpEntries[2].RsId); }
public void RefGlobalMajor() { // NIR-942 const string vcfLine = "1 1242707 rs2274262 A G,T . . RS=2274262;RSPOS=1242707;RV;dbSNPBuildID=100;SSR=0;SAO=0;VP=0x0501004a000507013e000100;WGT=1;VC=SNV;SLO;U5;INT;R5;ASP;VLD;G5A;G5;GNO;KGPhase1;KGPhase3;CAF=0.4,0.4,0.2;COMMON=1"; var dbsnpReader = new DbSnpReader(_renamer); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine)[0]; var sa = new SupplementaryAnnotationPosition(1242707); var saCreator = new SupplementaryPositionCreator(sa); dbSnpEntry.SetSupplementaryAnnotations(saCreator); saCreator.FinalizePositionalAnnotations(); Assert.Equal("A", sa.GlobalMajorAllele); Assert.Equal("G", sa.GlobalMinorAllele); }
public void DisregardZeroFreq() { const string vcfLine = "1 241369 rs11490246 C T . . RS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050100000005000126000100;WGT=1;VC=SNV;SLO;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0"; var dbsnpReader = new DbSnpReader(_renamer); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine)[0]; var sa = new SupplementaryAnnotationPosition(828); var saCreator = new SupplementaryPositionCreator(sa); dbSnpEntry.SetSupplementaryAnnotations(saCreator); saCreator.FinalizePositionalAnnotations(); Assert.Equal("T", sa.GlobalMajorAllele); Assert.Equal("1", sa.GlobalMajorAlleleFrequency); Assert.Null(sa.GlobalMinorAllele); Assert.Null(sa.GlobalMinorAlleleFrequency); }
public void MissingDbsnpId() { // refactorSA. Annotation for C is missing in the database. have to debug that. const string vcfLine = "X 21505833 rs12395602 G A,C,T . . RS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1"; var dbsnpReader = new DbSnpReader(_renamer); var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine); var sa = new SupplementaryAnnotationPosition(21505833); var saCreator = new SupplementaryPositionCreator(sa); foreach (var dbSnpEntry in dbSnpEntries) { dbSnpEntry.SetSupplementaryAnnotations(saCreator); } saCreator.FinalizePositionalAnnotations(); var dbSnpA = sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnpA); var dbSnpC = sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnpC); var dbSnpT = sa.AlleleSpecificAnnotations["T"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnpT); Assert.Equal(12395602, dbSnpA.DbSnp[0]); Assert.Equal(12395602, dbSnpC.DbSnp[0]); Assert.Equal(12395602, dbSnpT.DbSnp[0]); }
public void NoMinorAllele1() { const string vcfLine = "17 828 rs62053745 T C . . RS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.7424,.;COMMON=1"; var dbsnpReader = new DbSnpReader(_renamer); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine)[0]; var sa = new SupplementaryAnnotationPosition(828); var saCreator = new SupplementaryPositionCreator(sa); dbSnpEntry.SetSupplementaryAnnotations(saCreator); saCreator.FinalizePositionalAnnotations(); Assert.Equal("T", sa.GlobalMajorAllele); Assert.Equal("0.7424", sa.GlobalMajorAlleleFrequency); Assert.Null(sa.GlobalMinorAllele); Assert.Null(sa.GlobalMinorAlleleFrequency); }
public void MergeDbSnpCosmic() { const string vcfLine1 = "1 10228 rs143255646 TA T . . RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP"; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(10229)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem1 = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem1.SetSupplementaryAnnotations(sa) }; var cosmicItem1 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy(null, "carcinoma", "oesophagus") }, null); var cosmicItem2 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy("01", "carcinoma", "large_intestine") }, null); additionalItems.Add(cosmicItem1.SetSupplementaryAnnotations(sa)); additionalItems.Add(cosmicItem2.SetSupplementaryAnnotations(sa)); //sa.Clear(); foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(sa); } var dbSnpAnnotation = sa.SaPosition.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(dbSnpAnnotation); Assert.Equal(dbSnpAnnotation.DbSnp, new List <long> { 143255646 }); Assert.True(sa.SaPosition.ContainsCosmicId("COSM1000")); }
public void ReadWriteDbSnpCosmic() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, cosmicVersion }; // create our expected supplementary annotations const string vcfLine1 = "1 10228 rs143255646 TA T . . RS=143255646;RSPOS=10229;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP"; var sa = new SupplementaryAnnotationPosition(10229); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem1 = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem1.SetSupplementaryAnnotations(saCreator) }; var cosmicItem1 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy("", "carcinoma", "oesophagus") }, null); var cosmicItem2 = new CosmicItem("1", 10229, "COSM1000", "TA", "T", "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy("01", "carcinoma", "large_intestine") }, null); additionalItems.Add(cosmicItem1.SetSupplementaryAnnotations(saCreator)); additionalItems.Add(cosmicItem2.SetSupplementaryAnnotations(saCreator)); //sa.Clear(); foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(saCreator); } Assert.Equal(1, sa.CosmicItems.Count); // the preceeding code has been unit tested in MergeDbSnpCosmic() // write the supplementary annotation file using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(10229) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation1); var expDbSnp = sa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(expDbSnp); var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(obsDbSnp); Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp); Assert.True(observedAnnotation1.ContainsCosmicId(sa.CosmicItems[0].ID)); Assert.Equal(1, observedAnnotation1.CosmicItems.Count); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void ReadWriteExacDbsnp() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, cosmicVersion }; // create our expected supplementary annotations const string vcfLine1 = "2 48010488 rs1042821 G A . . RS=1042821;RSPOS=48010488;RV;dbSNPBuildID=86;SSR=0;SAO=1;VP=0x050168420a05150136100100;GENEINFO=MSH6:2956;WGT=1;VC=SNV;PM;PMC;SLO;NSM;REF;U5;R5;ASP;VLD;G5;GNO;KGPhase1;KGPhase3;LSD;CAF=0.7991,0.2009;COMMON=1"; const string vcfLine2 = "2 48010488 rs1042821 G A,C 14068898.15 PASS AC=21019,1;AC_AFR=1700,0;AC_AMR=1015,1;AC_Adj=19510,1;AC_EAS=1973,0;AC_FIN=743,0;AC_Het=15722,1,0;AC_Hom=1894,0;AC_NFE=10593,0;AC_OTH=147,0;AC_SAS=3339,0;AF=0.178,8.487e-06;AN=117830;AN_AFR=6388;AN_AMR=9014;AN_Adj=91130;AN_EAS=6792;AN_FIN=5078;AN_NFE=48404;AN_OTH=664;AN_SAS=14790;BaseQRankSum=-4.850e-01;ClippingRankSum=-1.400e-01;DB;DP=1206681;FS=0.000;GQ_MEAN=129.86;GQ_STDDEV=221.88;Het_AFR=1322,0,0;Het_AMR=931,1,0;Het_EAS=1511,0,0;Het_FIN=665,0,0;Het_NFE=8585,0,0;Het_OTH=111,0,0;Het_SAS=2597,0,0;Hom_AFR=189,0;Hom_AMR=42,0;Hom_EAS=231,0;Hom_FIN=39,0;Hom_NFE=1004,0;Hom_OTH=18,0;Hom_SAS=371,0;InbreedingCoeff=0.0376;MQ=60.00;MQ0=0;MQRankSum=0.00;NCC=3737;POSITIVE_TRAIN_SITE;QD=17.46;ReadPosRankSum=0.181;VQSLOD=5.87;culprit=MQ;DP_HIST=3051|9435|11318|5521|9711|11342|4131|1270|615|404|328|266|264|262|196|186|126|115|97|277,133|968|2180|3402|3564|2815|1772|954|551|389|321|263|261|261|196|186|126|115|97|277,0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=949|2966|347|492|15135|1435|1335|854|421|526|590|416|13672|1951|445|462|255|174|211|16279,24|79|81|124|135|96|110|118|97|180|228|137|182|191|126|171|180|151|192|16229,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1"; var sa = new SupplementaryAnnotationPosition(48010488); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem1 = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem1.SetSupplementaryAnnotations(saCreator); var exacReader = new ExacReader(_renamer); foreach (var exacItem in exacReader.ExtractItems(vcfLine2)) { exacItem.SetSupplementaryAnnotations(saCreator); } // write the supplementary annotation file using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr2", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(48010488) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation1); var expDbSnpA = sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; var obsDbSnpA = observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(expDbSnpA); Assert.NotNull(obsDbSnpA); // we want to make sure we are reading the values we have written Assert.Equal(expDbSnpA.DbSnp, obsDbSnpA.DbSnp); var expExacA = sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; var obsExacA = observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; Assert.NotNull(expExacA); Assert.NotNull(obsExacA); Assert.Equal(expExacA.ExacAllAn, obsExacA.ExacAllAn); Assert.Equal(expExacA.ExacCoverage, obsExacA.ExacCoverage); Assert.Equal(Convert.ToDouble(expExacA.ExacAllAc), Convert.ToDouble(obsExacA.ExacAllAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacAfrAc), Convert.ToDouble(obsExacA.ExacAfrAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacAmrAc), Convert.ToDouble(obsExacA.ExacAmrAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacEasAc), Convert.ToDouble(obsExacA.ExacEasAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacFinAc), Convert.ToDouble(obsExacA.ExacFinAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacNfeAc), Convert.ToDouble(obsExacA.ExacNfeAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacOthAc), Convert.ToDouble(obsExacA.ExacOthAc)); Assert.Equal(Convert.ToDouble(expExacA.ExacSasAc), Convert.ToDouble(obsExacA.ExacSasAc)); var expExacC = sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; var obsExacC = observedAnnotation1.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; Assert.NotNull(expExacC); Assert.NotNull(obsExacC); Assert.Equal(expExacC.ExacCoverage, obsExacC.ExacCoverage); Assert.Equal(Convert.ToDouble(expExacC.ExacAllAc), Convert.ToDouble(obsExacC.ExacAllAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacAfrAc), Convert.ToDouble(obsExacC.ExacAfrAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacAmrAc), Convert.ToDouble(obsExacC.ExacAmrAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacEasAc), Convert.ToDouble(obsExacC.ExacEasAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacFinAc), Convert.ToDouble(obsExacC.ExacFinAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacNfeAc), Convert.ToDouble(obsExacC.ExacNfeAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacOthAc), Convert.ToDouble(obsExacC.ExacOthAc)); Assert.Equal(Convert.ToDouble(expExacC.ExacSasAc), Convert.ToDouble(obsExacC.ExacSasAc)); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void MultiAlleleMergeDbSnp1KpEvsSaRw() { // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 1564952 rs112177324 TG T . . RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1"; const string vcfLine2 = "1 1564952 rs112177324 TG TGG,T 100 PASS AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136"; const string vcfLine3 = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryAnnotationPosition(1564953); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem.SetSupplementaryAnnotations(saCreator) }; var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator)); var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine3); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(saCreator); } // write the supplementary annotation file var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // checking the global alleles Assert.Null(sa.GlobalMajorAllele); Assert.Null(sa.GlobalMajorAlleleFrequency); Assert.Null(sa.GlobalMinorAllele); Assert.Null(sa.GlobalMinorAlleleFrequency); // extract the three annotations var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation); var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc; var expectedDelHasOneKg = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg); var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr; var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp); var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"]; var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"]; Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc); Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg)); Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr); Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp)); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void ReadAndWriteDbSnp1KgEvs() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 69428 rs140739101 T G . . RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1"; const string vcfLine2 = "1 69428 rs140739101 T G 100 PASS AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||"; const string vcfLine3 = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; const string altAllele = "G"; var sa = new SupplementaryAnnotationPosition(69428); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem.SetSupplementaryAnnotations(saCreator); var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; oneKGenItem.SetSupplementaryAnnotations(saCreator); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine3)[0]; evsItem.SetSupplementaryAnnotations(saCreator); // the preceeding code has been unit tested in MergeDbSnp1kpEvs() // write the supplementary annotation file using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation1); var expDbSnp = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(expDbSnp); var expOneKg = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(expOneKg); var expEvs = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(expEvs); var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(obsDbSnp); var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(obsOneKg); var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(obsEvs); Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp); Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll); Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc); Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage); Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void MultiAlleleMergeDbSnp1KpEvs() { const string vcfLine1 = "1 1564952 rs112177324 TG T . . RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1"; const string vcfLine2 = "1 1564952 rs112177324 TG TGG,T 100 PASS AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136"; const string vcfLine3 = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(1564952)); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem.SetSupplementaryAnnotations(sa) }; foreach (var oneKitem in _oneKGenReader.ExtractItems(vcfLine2)) { additionalItems.Add(oneKitem.SetSupplementaryAnnotations(sa)); } var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine3); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(sa)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(sa); } var asa1 = sa.SaPosition.AlleleSpecificAnnotations["1"]; var dbSnp1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; var oneKg1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evs1 = asa1.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(dbSnp1); Assert.NotNull(oneKg1); Assert.NotNull(evs1); var asaiG = sa.SaPosition.AlleleSpecificAnnotations["iG"]; var oneKgiG = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evsiG = asaiG.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(oneKgiG); Assert.NotNull(evsiG); Assert.Equal(new List <long> { 112177324 }, dbSnp1.DbSnp); var oneKggAc = oneKgiG.OneKgAllAc; var oneKggAn = oneKgiG.OneKgAllAn; var oneKg1Ac = oneKg1.OneKgAllAc; var oneKg1An = oneKg1.OneKgAllAn; Assert.NotNull(oneKggAc); Assert.NotNull(oneKggAn); Assert.NotNull(oneKg1Ac); Assert.NotNull(oneKg1An); Assert.Equal("0.002596", (oneKggAc.Value / (double)oneKggAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.150559", (oneKg1Ac.Value / (double)oneKg1An.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.012380", evsiG.EvsAfr); Assert.Equal("0.000258", evsiG.EvsEur); Assert.Equal("0.004072", evsiG.EvsAll); Assert.Equal("0.078503", evs1.EvsAfr); Assert.Equal("0.392534", evs1.EvsEur); Assert.Equal("0.293732", evs1.EvsAll); }