public void MultiAltAlleleTest() { const string vcfLine = "1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(15274, "A", 'C', _refChromDict)); var oneKGenItems = oneKGenReader.ExtractItems(vcfLine).ToList(); Assert.Equal(2, oneKGenItems.Count); var json1 = oneKGenItems[0].GetJsonString(); var json2 = oneKGenItems[1].GetJsonString(); Assert.Equal("0.347244", GetAlleleFrequency(json1, "allAf")); Assert.Equal("0.322995", GetAlleleFrequency(json1, "afrAf")); Assert.Equal("0.275216", GetAlleleFrequency(json1, "amrAf")); Assert.Equal("0.481151", GetAlleleFrequency(json1, "easAf")); Assert.Equal("0.292247", GetAlleleFrequency(json1, "eurAf")); Assert.Equal("0.349693", GetAlleleFrequency(json1, "sasAf")); Assert.Equal("0.640974", GetAlleleFrequency(json2, "allAf")); Assert.Equal("0.636914", GetAlleleFrequency(json2, "afrAf")); Assert.Equal("0.720461", GetAlleleFrequency(json2, "amrAf")); Assert.Equal("0.518849", GetAlleleFrequency(json2, "easAf")); Assert.Equal("0.707753", GetAlleleFrequency(json2, "eurAf")); //double check this one: 0.7077535 Assert.Equal("0.647239", GetAlleleFrequency(json2, "sasAf")); }
public void DisregardZeroFreq() { const string vcfLine = "1 241369 rs11490246 C T . . RS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050100000005000126000100;WGT=1;VC=SNV;SLO;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(241369, "C", 'G', ChromosomeUtilities.RefNameToChromosome); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First(); Assert.Equal("T", dbSnpEntry.AltAllele); }
public void CosmicAlleleSpecificIndel() { //10188320 var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, "G", 'A', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM18152.vcf"), Resources.TopPath("COSM18152.tsv"), seqProvider); var items = cosmicReader.GetItems(); Assert.Single(items); }
public void NoMinorAllele() { const string vcfLine = "17 828 rs62053745 T C . . RS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=.,0.7424;COMMON=1"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(828, "T", 'G', ChromosomeUtilities.RefNameToChromosome); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First(); Assert.Equal("C", dbSnpEntry.AltAllele); }
public void CosmicAltAllele() { var seqProvider = ParserTestUtils.GetSequenceProvider(6928019, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM983708.vcf"), Resources.TopPath("COSM983708.tsv"), seqProvider); var items = cosmicReader.GetItems().ToList(); Assert.Single((IEnumerable)items); Assert.Contains("\"refAllele\":\"-\"", items[0].GetJsonString()); }
public void MissingSubPopulationFrequencies() { const string vcfLine = "1\t10616\trs376342519\tCCGCCGTTGCAAAGGCGCGCCG\tC\t100\tPASS\tAN=5008;AC=4973;AF=0.993011;AA=;EAS_AN=1008;EAS_AC=999;EAS_AF=0.9911;EUR_AN=1006;EUR_AC=1000;EUR_AF=0.994;AFR_AN=1322;AFR_AC=1308;AFR_AF=0.9894;AMR_AN=694;AMR_AC=691;AMR_AF=0.9957;SAS_AN=978;SAS_AC=975;SAS_AF=0.9969"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10616, "CCGCCGTTGCAAAGGCGCGCCG", 'C', ChromosomeUtilities.RefNameToChromosome)); var items = oneKGenReader.ExtractItems(vcfLine).ToList(); Assert.Single(items); Assert.Equal("\"allAf\":0.993011,\"afrAf\":0.98941,\"amrAf\":0.995677,\"easAf\":0.991071,\"eurAf\":0.994036,\"sasAf\":0.996933,\"allAn\":5008,\"afrAn\":1322,\"amrAn\":694,\"easAn\":1008,\"eurAn\":1006,\"sasAn\":978,\"allAc\":4973,\"afrAc\":1308,\"amrAc\":691,\"easAc\":999,\"eurAc\":1000,\"sasAc\":975", items[0].GetJsonString()); }
public void MissingEntry2() { const string vcfLine = "17 828 rs62053745 T C . . RS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.2576,0.7424;COMMON=1"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(828, "T", 'A', _refChromDict); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First(); Assert.Equal(62053745, dbSnpEntry.RsId); }
public void MissingEntry() { const string vcfLine = "1 241369 rs11490246 C T . . RS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050000000005000126000100;WGT=1;VC=SNV;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(241369, "C", 'A', _refChromDict); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First(); Assert.Equal(11490246, dbSnpEntry.RsId); }
public void AlleleFrequencyTest() { const string vcfLine = "1 10352 rs555500075 T TA 100 PAS AC=2191;AF=0.4375;AN=5008;NS=2504;DP=88915;EAS_AF=0.4306;AMR_AF=0.4107;AFR_AF=0.4788;EUR_AF=0.4264;SAS_AF=0.4192;AA=|||unknown(NO_COVERAGE); VT=INDEL;EAS_AN=1008;EAS_AC=434;EUR_AN=1006;EUR_AC=429;AFR_AN=1322;AFR_AC=633;AMR_AN=694;AMR_AC=285;SAS_AN=978;SAS_AC=410"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10352, "T", 'C', _refChromDict)); var oneKItem = oneKGenReader.ExtractItems(vcfLine).First().GetJsonString(); Assert.Equal("0.4375", GetAlleleFrequency(oneKItem, "allAf")); Assert.Equal("0.47882", GetAlleleFrequency(oneKItem, "afrAf")); Assert.Equal("0.410663", GetAlleleFrequency(oneKItem, "amrAf")); Assert.Equal("0.430556", GetAlleleFrequency(oneKItem, "easAf")); Assert.Equal("0.426441", GetAlleleFrequency(oneKItem, "eurAf")); Assert.Equal("0.419223", GetAlleleFrequency(oneKItem, "sasAf")); Assert.DoesNotContain("ancestralAllele", oneKItem); }
public void PrioritizingSymbolicAllele4Svs() { const string vcfLine = "X 101155257 rs373174489 GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT G 100 PASS AN=3775;AC=1723;AF=0.456424;AA=;EAS_AN=764;EAS_AC=90;EAS_AF=0.1178;EUR_AN=766;EUR_AC=439;EUR_AF=0.5731;AFR_AN=1003;AFR_AC=839;AFR_AF=0.8365;AMR_AN=524;AMR_AC=180;AMR_AF=0.3435;SAS_AN=718;SAS_AC=175;SAS_AF=0.2437"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(101155257, "GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT", 'C', _refChromDict)); var oneKItems = oneKGenReader.ExtractItems(vcfLine); var json1 = oneKItems.First().GetJsonString(); Assert.Equal("0.456424", GetAlleleFrequency(json1, "allAf")); Assert.Equal("0.836491", GetAlleleFrequency(json1, "afrAf")); Assert.Equal("0.343511", GetAlleleFrequency(json1, "amrAf")); Assert.Equal("0.117801", GetAlleleFrequency(json1, "easAf")); Assert.Equal("0.573107", GetAlleleFrequency(json1, "eurAf")); Assert.Equal("0.243733", GetAlleleFrequency(json1, "sasAf")); }
public void MissingDbsnpId() { const string vcfLine = "X 21505833 rs12395602 G A,C,T . . RS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1"; var sequenceProvider = ParserTestUtils.GetSequenceProvider(21505833, "G", 'G', ChromosomeUtilities.RefNameToChromosome); var dbsnpReader = new DbSnpReader(null, sequenceProvider); var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine).ToList(); Assert.Equal(3, dbSnpEntries.Count); Assert.Equal("A", dbSnpEntries[0].AltAllele); Assert.Equal(12395602, dbSnpEntries[0].RsId); Assert.Equal("C", dbSnpEntries[1].AltAllele); Assert.Equal(12395602, dbSnpEntries[1].RsId); Assert.Equal("T", dbSnpEntries[2].AltAllele); Assert.Equal(12395602, dbSnpEntries[2].RsId); }
public void TwoStudyCosmicCoding() { var seqProvider = ParserTestUtils.GetSequenceProvider(35416, "A", 'C', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider); var cosmicItem = cosmicReader.GetItems().ToList()[0]; var studies = cosmicItem.Studies.ToList(); Assert.Equal("544", studies[0].Id); Assert.Equal(new[] { "haematopoietic and lymphoid tissue" }, studies[0].Sites); Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[0].Histologies); //Assert.Equal(new [] { "haematopoietic neoplasm", "acute myeloid leukaemia" }, study.Histologies); Assert.Equal("544", studies[1].Id); Assert.Equal(new[] { "haematopoietic;lymphoid tissue" }, studies[1].Sites); Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[1].Histologies); //Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies); }
public void IndelWithNoLeadingBase() { var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, "GGTACTGAC", 'A', ChromosomeUtilities.RefNameToChromosome); //the files provided are just for the sake of construction. The main aim is to test the VCF line parsing capabilities var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider); const string vcfLine1 = "3 10188320 COSM14426 GGTACTGAC A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2"; const string vcfLine2 = "3 10188320 COSM18152 G A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7"; var items = cosmicReader.ExtractCosmicItems(vcfLine1); Assert.Equal("GGTACTGAC", items[0].RefAllele); Assert.Equal("A", items[0].AltAllele); Assert.Equal(10188320, items[0].Position); var items2 = cosmicReader.ExtractCosmicItems(vcfLine2); Assert.Equal("G", items2[0].RefAllele); Assert.Equal("A", items2[0].AltAllele); Assert.Equal(10188320, items2[0].Position); }