public void EvsOutputTest() { var sa = new SupplementaryAnnotationPosition(115256529); var dbSnp = new DbSnpAnnotation { DbSnp = new List <long> { 121913237 } }; var evs = new EvsAnnotation { EvsAll = "0.0001", EvsCoverage = "102", NumEvsSamples = "3456" }; var saCreator = new SupplementaryPositionCreator(sa); saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.Evs, "C", evs); saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, "C", dbSnp); var saReader = new MockSupplementaryAnnotationReader(sa); VcfUtilities.FieldContains(saReader, "chr1\t115256529\t.\tT\tC\t1000\tPASS\t.\tGT\t0/1", "EVS=0.0001|102|3456", VcfCommon.InfoIndex); }
public void AlleleFrequency1KgOutputTest() { var sa = new SupplementaryAnnotationPosition(115256529); var oneKg = new OneKGenAnnotation { OneKgAllAn = 5008, OneKgAllAc = 2130, OneKgAmrAn = 694, OneKgAmrAc = 250 }; var dbSnp = new DbSnpAnnotation { DbSnp = new List <long> { 11554290 } }; var saCreator = new SupplementaryPositionCreator(sa); saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.OneKg, "C", oneKg); saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, "C", dbSnp); var saReader = new MockSupplementaryAnnotationReader(sa); VcfUtilities.FieldContains(saReader, "chr1\t115256529\t.\tT\tC\t1000\tPASS\t.\tGT\t0/1", "AF1000G=0.425319", VcfCommon.InfoIndex); }
public void OneAlleleFreqMissing() { var sa = new SupplementaryAnnotationPosition(825069); var saCreator = new SupplementaryPositionCreator(sa); var dbSnp = new DbSnpAnnotation { DbSnp = new List <long> { 4475692 } }; var oneKg = new OneKGenAnnotation { OneKgAllAn = 5008, OneKgAllAc = 3392 }; saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, "C", dbSnp); saCreator.AddExternalDataToAsa(DataSourceCommon.DataSource.OneKg, "C", oneKg); var saReader = new MockSupplementaryAnnotationReader(sa); VcfUtilities.FieldContains(saReader, "chr1 825069 rs4475692 G A,C 362.00 LowGQX;HighDPFRatio SNVSB=-36.9;SNVHPOL=3 GT:GQ:GQX:DP:DPF:AD 1/2:4:0:52:38:8,11,33", "AF1000G=.,0.677316", VcfCommon.InfoIndex); }
public void CosmicOutputTest() { var sa = new SupplementaryAnnotationPosition(115256529); var saCreator = new SupplementaryPositionCreator(sa); var altAllele = "C"; var cosmicItem1 = new CosmicItem("chr1", 115256529, "COSM1000", "T", altAllele, "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy("", "carcinoma", "oesophagus") }, 1, altAllele); var cosmicItem2 = new CosmicItem("chr1", 115256529, "COSM1001", "T", altAllele, "TP53", new HashSet <CosmicItem.CosmicStudy> { new CosmicItem.CosmicStudy("01", "carcinoma", "large_intestine") }, 1, altAllele); cosmicItem1.AddCosmicToSa(saCreator); cosmicItem2.AddCosmicToSa(saCreator); var saReader = new MockSupplementaryAnnotationReader(sa); VcfUtilities.FieldContains(saReader, "chr1\t115256529\t.\tT\tC\t1000\tPASS\t.\tGT\t0/1", "cosmic=1|COSM1000", VcfCommon.InfoIndex); }
public void SpuriousRefMinor() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr2_190634103_190634104.nsa")); VcfUtilities.FieldEquals(saReader, "2 190634103 . C . . HighDPFRatio .", ".", VcfCommon.InfoIndex); }
public void EmptySamplesTest() { using (var reader = new LiteVcfReader(ResourceUtilities.GetReadStream(Resources.InputFiles("Nirvana_unified_json_format.vcf")))) { // getting the 4th variant VcfUtilities.GetNextVariant(reader, _renamer); VcfUtilities.GetNextVariant(reader, _renamer); VcfUtilities.GetNextVariant(reader, _renamer); var variant = VcfUtilities.GetNextVariant(reader, _renamer); // GT:GQ:GQX:DPI:AD // 0/1:124:19:5:11,8:PASS:. // . // 1/2:55:59:.:0,21:LowGQX:20 // the unified json will call this function to get all the samples and can print them out using GetEntry as shown below var sampleVariants = variant.ExtractSampleInfo(); var expectedEntry = "{\"variantFreq\":0.4211,\"genotypeQuality\":124,\"alleleDepths\":[11,8],\"genotype\":\"0/1\"}"; var observedEntry = sampleVariants[0].ToString(); Assert.Equal(expectedEntry, observedEntry); expectedEntry = "{\"isEmpty\":true}"; observedEntry = sampleVariants[1].ToString(); Assert.Equal(expectedEntry, observedEntry); expectedEntry = "{\"variantFreq\":1,\"totalDepth\":20,\"genotypeQuality\":55,\"alleleDepths\":[0,21],\"genotype\":\"1/2\",\"failedFilter\":true}"; observedEntry = sampleVariants[2].ToString(); Assert.Equal(expectedEntry, observedEntry); } }
public void JsonSamplesOutput() { using (var reader = new LiteVcfReader(ResourceUtilities.GetReadStream(Resources.InputFiles("Nirvana_unified_json_format.vcf")))) { // LP2000021 LP2000022 LP2000023 Assert.Equal("LP2000021", reader.SampleNames[0]); Assert.Equal("LP2000022", reader.SampleNames[1]); Assert.Equal("LP2000023", reader.SampleNames[2]); var variant = VcfUtilities.GetNextVariant(reader, _renamer); // chr9 138685463 . A C . PASS BaseQRankSum=-1.61165;GQ=120;DP=43;ReadPosRankSum=0;MQ=60;SNVHPOL=3;SNVSB=-65.7;MQRankSum=0 GT:GQX:GQ:DP:DPF:AD 0/0:90:0:31:0:. 0/0:75:0:26:0:. 0/1:161:194:36:0:20,16 // the unified json will call this function to get all the samples and can print them out using GetEntry as shown below var sampleVariants = variant.ExtractSampleInfo(); Assert.Equal("0/0", sampleVariants[0].Genotype); Assert.Equal("0/0", sampleVariants[1].Genotype); Assert.Equal("0/1", sampleVariants[2].Genotype); const string expectedEntry1 = "{\"totalDepth\":31,\"genotypeQuality\":90,\"genotype\":\"0/0\"}"; const string expectedEntry2 = "{\"totalDepth\":26,\"genotypeQuality\":75,\"genotype\":\"0/0\"}"; const string expectedEntry3 = "{\"variantFreq\":0.4444,\"totalDepth\":36,\"genotypeQuality\":161,\"alleleDepths\":[20,16],\"genotype\":\"0/1\"}"; Assert.Equal(expectedEntry1, sampleVariants[0].ToString()); Assert.Equal(expectedEntry2, sampleVariants[1].ToString()); Assert.Equal(expectedEntry3, sampleVariants[2].ToString()); } }
public void SpuriousRefMinor2() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chrX_1619046_1619046.nsa")); VcfUtilities.FieldEquals(saReader, "X 1619046 . C . . LowGQX RefMinor GT:GQX:DP:DPF 0/0:8:38:12", ".", VcfCommon.InfoIndex); }
public void CosmicMultiDelete() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_21319650_21319651.nsa")); VcfUtilities.FieldEquals(saReader, "17 21319650 . CGAG C 101 PASS CIGAR=1M3D;RU=GAG;REFREP=2;IDREP=1 GT:GQ:GQX:DPI:AD 0/1:141:101:29:22,4", "CIGAR=1M3D;RU=GAG;REFREP=2;IDREP=1;cosmic=1|COSM278475", VcfCommon.InfoIndex); }
public void NoRefMinorForDeletion() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_77263_77265.nsa")); VcfUtilities.FieldEquals(saReader, "17 77263 . TG T 428 PASS CIGAR=1M1D;RU=G;REFREP=4;IDREP=3 GT:GQ:GQX:DPI:AD 1/1:33:30:12:0,11", "CIGAR=1M1D;RU=G;REFREP=4;IDREP=3;AA=GGG;AF1000G=1", VcfCommon.InfoIndex); }
public void DbSnpIds() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_129010_129012.nsa")); VcfUtilities.FieldEquals(saReader, "1 1594584 MantaDEL:164:0:1:1:0:0;rs123 C <DEL> . MGE10kb END=1660503;SVTYPE=DEL;SVLEN=-65919;IMPRECISE;CIPOS=-285,285;CIEND=-205,205;SOMATIC;SOMATICSCORE=36;Colocaliz edCanvas PR 42,0 226,9", "MantaDEL:164:0:1:1:0:0", VcfCommon.IdIndex); }
public void MissingRsid() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_129010_129012.nsa")); VcfUtilities.FieldEquals(saReader, "chr1 129010 rs377161483 AATG A 32 LowGQXHetAltDel CIGAR=1M1D1M,2M1D;RU=C,A;REFREP=1,17;IDREP=0,16 GT:GQ:GQX:DPI:AD 1/2:162:2:22:4,8,1", "rs377161483", VcfCommon.IdIndex); }
public void MultiDbSnpOutput() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_186913_186914.nsa")); VcfUtilities.FieldEquals(saReader, "17 186913 rs34543275,rs11453667 A AT 111 LowGQX CIGAR=1M1I;RU=T;REFREP=11;IDREP=12;GMAF=AC|0.002995;AF1000G=0.748003;CSQT=1|RPH3AL|ENST00000331302|intron_variant&feature_elongation,1||ENST00000575743|downstream_gene_variant GT:GQ:GQX:DPI:AD 1/1:21:18:9:0,7", "rs11453667;rs34543275", VcfCommon.IdIndex); }
public void NotPotentialRefMinor() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_77263_77265.nsa")); VcfUtilities.FieldEquals(saReader, "17 77264 . G . 428 PASS END=77265;CIGAR=1M1D;RU=G;REFREP=4;IDREP=3 GT:GQ:GQX:DPI:AD 1/1:33:30:12:0,11", "END=77265;CIGAR=1M1D;RU=G;REFREP=4;IDREP=3", VcfCommon.InfoIndex); }
public void DuplicateOneKgFreq() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr5_29786207_29786208.nsa")); VcfUtilities.FieldEquals(saReader, "5 29786207 rs150619197 C . . SiteConflict;LowGQX END=29786207;BLOCKAVG_min30p3a;AF1000G=.,0.994409;GMAF=A|0.9944;RefMinor GT:GQX:DP:DPF 0:24:9:0", "END=29786207;BLOCKAVG_min30p3a;RefMinor;GMAF=C|0.005591", VcfCommon.InfoIndex); }
public void RepeatExpansion() { const string vcfLine = "chrX 146993568 FMR1 G <REPEAT:EXPANSION> 1.0 NoSuppReads REPEAT_COUNT1=30,33"; var variant = VcfUtilities.GetVariant(vcfLine, _renamer); const int expectedReferenceBegin = 146993569; Assert.Equal(expectedReferenceBegin, variant.AlternateAlleles[0].Start); }
public void EvsWrongAltValue() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_641334_641337.nsa")); VcfUtilities.FieldEquals(saReader, "17 641336 rs60947910 C T 9 LowGQX SNVSB=0.0;SNVHPOL=19;AA=C;GMAF=T|0.1835;AF1000G=0.183506;EVS=|22|6254;phyloP=-1.271 GT:GQ:GQX:DP:DPF:AD 0/1:17:9:3:2:1,2", "SNVSB=0.0;SNVHPOL=19;AA=C;GMAF=T|0.1835;AF1000G=0.183506;cosmic=1|COSN6415581", VcfCommon.InfoIndex); }
public void MissingRefMinorAnnotation() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr2_193187632_193187633.nsa")); VcfUtilities.FieldEquals(saReader, "2 193187632 . G . . LowGQX;HighDPFRatio . GT:GQX:DP:DPF .:.:0:2", "RefMinor;GMAF=G|0.01937", VcfCommon.InfoIndex); }
public void DuplicateEntryRefMinor() { // the following entry should not get refMinor tag. It has conflicting entries in 1kg and should have no allele frequency related info var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chrX_1389061_1389062.nsa")); VcfUtilities.FieldDoesNotContain(saReader, "X 1389061 . A C 100 PASS AC=3235", "RefMinor", VcfCommon.InfoIndex); }
public void VariantSiteRefMinor() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_789256_789257.nsa")); VcfUtilities.FieldEquals(saReader, "1 789256 rs3131939 T C . LowGQX . GT:GQX:DP:DPF:AD 0:.:0:0:0", "GMAF=T|0.005192;AF1000G=0.994808", VcfCommon.InfoIndex); }
public void RefSiteRefMinor() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_789256_789257.nsa")); VcfUtilities.FieldEquals(saReader, "1 789256 rs3131939 T . . LowGQX END=789256 GT:GQX:DP:DPF:AD 0:.:0:0:0", "END=789256;RefMinor;GMAF=T|0.005192", VcfCommon.InfoIndex); }
public void MissingEvsValue() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_226259211_226259213.nsa")); VcfUtilities.FieldEquals(saReader, "1 226259211 rs375001380;rs397983063 TCA TA,TC 32 LowGQXHetAltDel CIGAR=1M1D1M,2M1D;RU=C,A;REFREP=1,17;IDREP=0,16;EVS=|6|5096;CSQT=1|H3F3A|ENST00000366813|3_prime_UTR_variant&feature_truncation,2|H3F3A|ENST00000366813|3_prime_UTR_variant&feature_truncation GT:GQ:GQX:DPI:AD 1/2:162:2:22:4,8,1", "CIGAR=1M1D1M,2M1D;RU=C,A;REFREP=1,17;IDREP=0,16", VcfCommon.InfoIndex); }
public void MissingDbsnpId() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_3616153_3616154.nsa")); VcfUtilities.FieldEquals(saReader, "17 3616153 rs34081014 C G 48 PASS SNVSB=0.0;SNVHPOL=2;AA=C;GMAF=G|0.07029;AF1000G=0.0702875;phyloP=0.444;CSQT=1|ITGAE|ENST00000263087|downstream_gene_variant;CSQR=1|ENSR00001339304|regulatory_region_variant GT:GQ:GQX:DP:DPF:AD 0/1:47:44:6:0:2,4", "rs34081014;rs71362546", VcfCommon.IdIndex); }
public void MissingVid() { const string vcfLine = "chr4 92647305 MantaBND:1703:0:1:0:0:0:0 A A[JTFH01000254.1:1952[ 49 MinGQ SVTYPE=BND;MATEID=MantaBND:1703:0:1:0:0:0:1;CIPOS=0,2;HOMLEN=2;HOMSEQ=AG;BND_DEPTH=6;MATE_BND_DEPTH=3 GT:FT:GQ:PL:PR:SR 0/0:MinGQ:7:44,0,18:1,0:0,1 0/0:MinGQ:12:39,0,45:1,2:1,0 1/1:MinGQ:5:99,6,0:0,0:0,2"; var variant = VcfUtilities.GetVariant(vcfLine, _renamer); Assert.Equal("4:92647305:+:JTFH01000254.1:1952:+", variant.AlternateAlleles[0].VariantId); }
public void NoGlobalMinorAllele() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr1_241369_241370.nsa")); VcfUtilities.FieldEquals(saReader, "1 241369 . C T 77 LowGQXHomSNP SNVSB=0.0;SNVHPOL=3;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|AP006222.2|ENSG00000228463|Transcript|ENST00000424587|lincRNA||2/3|ENST00000424587.2:n.264-2802G>A|||||||||-1|Clone_based_vega_gene||YES|||||||||,T|upstream_gene_variant|MODIFIER|AP006222.2|ENSG00000228463|Transcript|ENST00000448958|lincRNA|||||||||||2811|-1|Clone_based_vega_gene||||||||||| GT:GQ:GQX:DP:DPF:AD 1/1:12:13:5:2:0,5", "SNVSB=0.0;SNVHPOL=3;AF1000G=1", VcfCommon.InfoIndex); }
public void SvVidChrName() { // NIR-941 const string vcfLine = "chr1 814866 Canvas:GAIN:chr1:814867:824517 N <CNV> 4 q10;CLT10kb SVTYPE=CNV;END=824517;CSQ=CNV|upstream_gene_variant|MODIFIER|FAM41C|284593|Transcript|NR_027055.1|misc_RNA|||||||||||2685|-1|||YES|||rseq_mrna_match&rseq_ens_no_match||||||| RC:BC:CN\t214:7:4"; var variant = VcfUtilities.GetVariant(vcfLine, _renamer); // the naming convention for breakends is different Assert.Equal("1:814867:824517:4", variant.AlternateAlleles[0].VariantId); }
public void BreakendVid2() { // NIR-941 const string vcfLine = "chr1 9121449 MantaBND:542:0:2:0:0:0:0 C [chr14:93712486[C 518 PASS SVTYPE=BND;MATEID=MantaBND:542:0:2:0:0:0:1;CIPOS=0,4;HOMLEN=4;HOMSEQ=CCTG;BND_DEPTH=49;MATE_BND_DEPTH=48 GT:GQ:PR:SR 0/1:518:33,2:32,15"; var variant = VcfUtilities.GetVariant(vcfLine, _renamer); // the naming convention for breakends is different Assert.Equal("1:9121449:-:14:93712486:+", variant.AlternateAlleles[0].VariantId); }
public void BreakendVid1() { // NIR-941 const string vcfLine = "chr1 797265 MantaBND:10:0:1:0:2:0:0 G G]chr8:245687] 55 PASS SVTYPE=BND;MATEID=MantaBND:10:0:1:0:2:0:1;CIPOS=0,31;HOMLEN=31;HOMSEQ=ATTGATAGATGATAGGTAGATAGTAGATAGA;BND_DEPTH=59;MATE_BND_DEPTH=41 GT:GQ:PR:SR 0/1:55:39,6:20,3"; var variant = VcfUtilities.GetVariant(vcfLine, _renamer); // the naming convention for breakends is different Assert.Equal("1:797265:+:8:245687:-", variant.AlternateAlleles[0].VariantId); }
public void MissingAFinX() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chrX_857972_857973.nsa")); VcfUtilities.FieldEquals(saReader, "X 857972 . A C . LowQscore SOMATIC;QSS=1;TQSS=2;NT=ref;QSS_NT=1;TQSS_NT=2;SGT=AC->AC;DP=143;MQ=58.73;MQ0=5;ALTPOS=6;ALTMAP=5;ReadPosRankSum=-2.92;SNVSB=0.00;PNOISE=0.00;PNOISE2=0.00;VQSR=1.98;phyloP=-1.187 DP:FDP:SDP:SUBDP:AU:CU:GU:TU 58:1:0:0:57,58:0,4:0,0:0,0 74:1:0:0:69,70:4,11:0,0:0,0", "SOMATIC;QSS=1;TQSS=2;NT=ref;QSS_NT=1;TQSS_NT=2;SGT=AC->AC;DP=143;MQ=58.73;MQ0=5;ALTPOS=6;ALTMAP=5;ReadPosRankSum=-2.92;SNVSB=0.00;PNOISE=0.00;PNOISE2=0.00;VQSR=1.98;GMAF=A|0.4323;AF1000G=0.567692", VcfCommon.InfoIndex); }
public void AlleleSpecificClinvarVcf() { var saReader = ResourceUtilities.GetSupplementaryAnnotationReader(Resources.MiniSuppAnnot("chr17_2266812_2266813.nsa")); VcfUtilities.FieldEquals(saReader, "17 2266812 rs2003968 T C 112 LowGQX SNVSB=-8.7;SNVHPOL=3;AA=C;GMAF=C|0.5663;AF1000G=0.566294;EVS=0.4839|27|6502;phyloP=-5.078;cosmic=COSM1563374;clinvar=1|other;CSQT=1|SGSM2|ENST00000268989|synonymous_variant GT:GQ:GQX:DP:DPF:AD 1/1:21:21:8:1:0,8", "SNVSB=-8.7;SNVHPOL=3;AA=C;GMAF=T|0.4337;AF1000G=0.566294;EVS=0.483928|27|6502;cosmic=1|COSM1563374", VcfCommon.InfoIndex); }