/// <summary> /// constructor /// </summary> public MergeSaTests(ChromosomeRenamerFixture fixture) { _renamer = fixture.Renamer; _sequence = fixture.Sequence; _reader = fixture.Reader; _oneKGenReader = new OneKGenReader(_renamer); }
public void MultiAltAlleleTest() { const string vcfLine = "1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(15274, "A", 'C', _refChromDict)); var oneKGenItems = oneKGenReader.ExtractItems(vcfLine).ToList(); Assert.Equal(2, oneKGenItems.Count); var json1 = oneKGenItems[0].GetJsonString(); var json2 = oneKGenItems[1].GetJsonString(); Assert.Equal("0.347244", GetAlleleFrequency(json1, "allAf")); Assert.Equal("0.322995", GetAlleleFrequency(json1, "afrAf")); Assert.Equal("0.275216", GetAlleleFrequency(json1, "amrAf")); Assert.Equal("0.481151", GetAlleleFrequency(json1, "easAf")); Assert.Equal("0.292247", GetAlleleFrequency(json1, "eurAf")); Assert.Equal("0.349693", GetAlleleFrequency(json1, "sasAf")); Assert.Equal("0.640974", GetAlleleFrequency(json2, "allAf")); Assert.Equal("0.636914", GetAlleleFrequency(json2, "afrAf")); Assert.Equal("0.720461", GetAlleleFrequency(json2, "amrAf")); Assert.Equal("0.518849", GetAlleleFrequency(json2, "easAf")); Assert.Equal("0.707753", GetAlleleFrequency(json2, "eurAf")); //double check this one: 0.7077535 Assert.Equal("0.647239", GetAlleleFrequency(json2, "sasAf")); }
public void MergeDbSnp1Kg() { //NIR-1262 const string vcfLine = "1 825069 rs4475692 G A,C . . RS=4475692;RSPOS=825069;dbSNPBuildID=111;SSR=0;SAO=0;VP=0x050100000005170126000100;WGT=1;VC=SNV;SLO;ASP;VLD;G5A;G5;GNO;KGPhase3;CAF=0.3227,.,0.6773;COMMON=1"; const string vcfLine1Kg = "1 825069 rs4475692 G C 100 PASS AC=3392;AF=0.677316;AN=5008;NS=2504;DP=22495;EAS_AF=0.754;AMR_AF=0.5692;AFR_AF=0.6127;EUR_AF=0.7286;SAS_AF=0.7096;AA=g|||;VT=SNP;EAS_AN=1008;EAS_AC=760;EUR_AN=1006;EUR_AC=733;AFR_AN=1322;AFR_AC=810;AMR_AN=694;AMR_AC=395;SAS_AN=978;SAS_AC=694\tGT"; var dbsnpReader = new DbSnpReader(_renamer); var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069)); foreach (var dbSnpEntry in dbsnpReader.ExtractItem(vcfLine)) { dbSnpEntry.SetSupplementaryAnnotations(sa); } var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069)); var onekgReader = new OneKGenReader(_renamer); foreach (var onekgItem in onekgReader.ExtractItems(vcfLine1Kg)) { onekgItem.SetSupplementaryAnnotations(sa1); } sa.MergeSaCreator(sa1); sa.FinalizePositionalAnnotations(); Assert.Equal("C", sa.SaPosition.GlobalMajorAllele); Assert.Equal("G", sa.SaPosition.GlobalMinorAllele); }
public void MissingSubPopulationFrequencies() { const string vcfLine = "1\t10616\trs376342519\tCCGCCGTTGCAAAGGCGCGCCG\tC\t100\tPASS\tAN=5008;AC=4973;AF=0.993011;AA=;EAS_AN=1008;EAS_AC=999;EAS_AF=0.9911;EUR_AN=1006;EUR_AC=1000;EUR_AF=0.994;AFR_AN=1322;AFR_AC=1308;AFR_AF=0.9894;AMR_AN=694;AMR_AC=691;AMR_AF=0.9957;SAS_AN=978;SAS_AC=975;SAS_AF=0.9969"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10616, "CCGCCGTTGCAAAGGCGCGCCG", 'C', ChromosomeUtilities.RefNameToChromosome)); var items = oneKGenReader.ExtractItems(vcfLine).ToList(); Assert.Single(items); Assert.Equal("\"allAf\":0.993011,\"afrAf\":0.98941,\"amrAf\":0.995677,\"easAf\":0.991071,\"eurAf\":0.994036,\"sasAf\":0.996933,\"allAn\":5008,\"afrAn\":1322,\"amrAn\":694,\"easAn\":1008,\"eurAn\":1006,\"sasAn\":978,\"allAc\":4973,\"afrAc\":1308,\"amrAc\":691,\"easAc\":999,\"eurAc\":1000,\"sasAc\":975", items[0].GetJsonString()); }
/// <summary> /// constructor /// </summary> public OneKGenTests() { _refChromDict = new Dictionary <string, IChromosome> { { "1", new Chromosome("chr1", "1", 0) }, { "4", new Chromosome("chr4", "4", 3) }, { "X", new Chromosome("chrX", "X", 22) } }; _oneKGenReader = new OneKGenReader(_refChromDict); }
public void Merge1KgEvsExac() { const string vcfLine1 = "1 13382 rs191719684 C G . PASS DBSNP=dbSNP_135;EA_AC=0,8600;AA_AC=17,4389;TAC=17,12989;MAF=0.0,0.3858,0.1307;GTS=GG,GC,CC;EA_GTC=0,0,4300;AA_GTC=0,17,2186;GTC=0,17,6486;DP=54;GL=SAMD11;CP=0.0;CG=1.5;AA=C;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_152486.2:intron;HGVS_CDNA_VAR=NM_152486.2:c.-30C>G;HGVS_PROTEIN_VAR=.;CDS_SIZES=NM_152486.2:2046;GS=.;PH=.;EA_AGE=.;AA_AGE=24.3+/-50.5"; const string vcfLine2 = "1 13382 . C G 320.40 VQSRTrancheSNP99.60to99.80 AC=3;AC_AFR=0;AC_AMR=0;AC_Adj=1;AC_EAS=0;AC_FIN=0;AC_Het=1;AC_Hom=0;AC_NFE=0;AC_OTH=0;AC_SAS=1;AF=1.079e-04;AN=27810;AN_AFR=460;AN_AMR=82;AN_Adj=5728;AN_EAS=148;AN_FIN=4;AN_NFE=1400;AN_OTH=60;AN_SAS=3574;BaseQRankSum=-8.880e-01;ClippingRankSum=0.493;DP=86138;FS=0.000;GQ_MEAN=11.35;GQ_STDDEV=12.58;Het_AFR=0;Het_AMR=0;Het_EAS=0;Het_FIN=0;Het_NFE=0;Het_OTH=0;Het_SAS=1;Hom_AFR=0;Hom_AMR=0;Hom_EAS=0;Hom_FIN=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;InbreedingCoeff=-0.0832;MQ=34.49;MQ0=0;MQRankSum=-6.910e-01;NCC=72140;QD=20.03;ReadPosRankSum=-2.073e+00;VQSLOD=-4.106e+00;culprit=MQ;DP_HIST=9135|1821|1658|665|130|135|199|110|41|8|2|1|0|0|0|0|0|0|0|0,1|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=1432|8682|140|118|2625|254|121|17|3|1|10|24|364|94|9|11|0|0|0|0,0|1|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0;DOUBLETON_DIST=.;AC_MALE=1;AC_FEMALE=0;AN_MALE=3866;AN_FEMALE=1862;AC_CONSANGUINEOUS=0;AN_CONSANGUINEOUS=684;Hom_CONSANGUINEOUS=0"; const string vcfLine3 = "1 13382 rs538606945 C G 100 PASS AC=1;AF=0.000199681;AN=5008;NS=2504;DP=28817;EAS_AF=0;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0.001;AA=c|||;VT=SNP"; var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine1)[0]; evsItem.SetSupplementaryAnnotations(sa1); var sa2 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var exacReader = new ExacReader(_renamer); var exacItem = exacReader.ExtractItems(vcfLine2)[0]; exacItem.SetSupplementaryAnnotations(sa2); var sa3 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382)); var onekReader = new OneKGenReader(_renamer); var onekItem = onekReader.ExtractItems(vcfLine3)[0]; onekItem.SetSupplementaryAnnotations(sa3); sa1.MergeSaCreator(sa2); sa1.MergeSaCreator(sa3); var asa = sa1.SaPosition.AlleleSpecificAnnotations["G"]; var exac = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation; var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; var evs = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(exac); Assert.NotNull(oneKg); Assert.NotNull(evs); var oneKgAc = oneKg.OneKgAllAc; var oneKgAn = oneKg.OneKgAllAn; var exacAllAc = exac.ExacAllAc; var exacAllAn = exac.ExacAllAn; Assert.NotNull(oneKgAc); Assert.NotNull(oneKgAn); Assert.NotNull(exacAllAc); Assert.NotNull(exacAllAn); Assert.Equal("0.0002", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); Assert.Equal("0.001307", evs.EvsAll); Assert.Equal("0.000175", (exacAllAc.Value / (double)exacAllAn.Value).ToString(JsonCommon.FrequencyRoundingFormat)); }
public void AlleleFrequencyTest() { const string vcfLine = "1 10352 rs555500075 T TA 100 PAS AC=2191;AF=0.4375;AN=5008;NS=2504;DP=88915;EAS_AF=0.4306;AMR_AF=0.4107;AFR_AF=0.4788;EUR_AF=0.4264;SAS_AF=0.4192;AA=|||unknown(NO_COVERAGE); VT=INDEL;EAS_AN=1008;EAS_AC=434;EUR_AN=1006;EUR_AC=429;AFR_AN=1322;AFR_AC=633;AMR_AN=694;AMR_AC=285;SAS_AN=978;SAS_AC=410"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10352, "T", 'C', _refChromDict)); var oneKItem = oneKGenReader.ExtractItems(vcfLine).First().GetJsonString(); Assert.Equal("0.4375", GetAlleleFrequency(oneKItem, "allAf")); Assert.Equal("0.47882", GetAlleleFrequency(oneKItem, "afrAf")); Assert.Equal("0.410663", GetAlleleFrequency(oneKItem, "amrAf")); Assert.Equal("0.430556", GetAlleleFrequency(oneKItem, "easAf")); Assert.Equal("0.426441", GetAlleleFrequency(oneKItem, "eurAf")); Assert.Equal("0.419223", GetAlleleFrequency(oneKItem, "sasAf")); Assert.DoesNotContain("ancestralAllele", oneKItem); }
public void PrioritizingSymbolicAllele4Svs() { const string vcfLine = "X 101155257 rs373174489 GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT G 100 PASS AN=3775;AC=1723;AF=0.456424;AA=;EAS_AN=764;EAS_AC=90;EAS_AF=0.1178;EUR_AN=766;EUR_AC=439;EUR_AF=0.5731;AFR_AN=1003;AFR_AC=839;AFR_AF=0.8365;AMR_AN=524;AMR_AC=180;AMR_AF=0.3435;SAS_AN=718;SAS_AC=175;SAS_AF=0.2437"; var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(101155257, "GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT", 'C', _refChromDict)); var oneKItems = oneKGenReader.ExtractItems(vcfLine); var json1 = oneKItems.First().GetJsonString(); Assert.Equal("0.456424", GetAlleleFrequency(json1, "allAf")); Assert.Equal("0.836491", GetAlleleFrequency(json1, "afrAf")); Assert.Equal("0.343511", GetAlleleFrequency(json1, "amrAf")); Assert.Equal("0.117801", GetAlleleFrequency(json1, "easAf")); Assert.Equal("0.573107", GetAlleleFrequency(json1, "eurAf")); Assert.Equal("0.243733", GetAlleleFrequency(json1, "sasAf")); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_'); using (var oneKGenReader = new OneKGenReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var writer = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.OneKgenTag, true, false, SaCommon.SchemaVersion, false)) { writer.Write(oneKGenReader.GetItems()); } return(ExitCodes.Success); }
private void CreateOnekgTsv(string fileName) { if (fileName == null) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); using (var tsvWriter = new OnekgTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var onekgReader = new OneKGenReader(new FileInfo(fileName), _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(onekgReader.GetOneKGenItems(), tsvWriter); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("OneKg", version.Version, timeSpan); }
public void MultiAlleleMergeDbSnp1KpEvsSaRw() { // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 1564952 rs112177324 TG T . . RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1"; const string vcfLine2 = "1 1564952 rs112177324 TG TGG,T 100 PASS AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136"; const string vcfLine3 = "1 1564952 rs112177324 TG TGG,T . PASS BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=."; var sa = new SupplementaryAnnotationPosition(1564953); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; var additionalItems = new List <SupplementaryDataItem> { dbSnpItem.SetSupplementaryAnnotations(saCreator) }; var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator)); var evsReader = new EvsReader(_renamer); var evsItemsList = evsReader.ExtractItems(vcfLine3); foreach (var evsItem in evsItemsList) { additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator)); } foreach (var item in additionalItems) { item.SetSupplementaryAnnotations(saCreator); } // write the supplementary annotation file var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // checking the global alleles Assert.Null(sa.GlobalMajorAllele); Assert.Null(sa.GlobalMajorAlleleFrequency); Assert.Null(sa.GlobalMinorAllele); Assert.Null(sa.GlobalMinorAlleleFrequency); // extract the three annotations var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation); var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc; var expectedDelHasOneKg = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg); var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr; var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp); var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"]; var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"]; Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc); Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg)); Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr); Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp)); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
public void ReadAndWriteDbSnp1KgEvs() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks); var evsDataSource = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, oneKGenVersion, evsDataSource }; // create our expected supplementary annotations const string vcfLine1 = "1 69428 rs140739101 T G . . RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1"; const string vcfLine2 = "1 69428 rs140739101 T G 100 PASS AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||"; const string vcfLine3 = "1 69428 rs140739101 T G . PASS BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=."; const string altAllele = "G"; var sa = new SupplementaryAnnotationPosition(69428); var saCreator = new SupplementaryPositionCreator(sa); var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItem = dbsnpReader.ExtractItem(vcfLine1)[0]; dbSnpItem.SetSupplementaryAnnotations(saCreator); var oneKGenReader = new OneKGenReader(_renamer); var oneKGenItem = oneKGenReader.ExtractItems(vcfLine2)[0]; oneKGenItem.SetSupplementaryAnnotations(saCreator); var evsReader = new EvsReader(_renamer); var evsItem = evsReader.ExtractItems(vcfLine3)[0]; evsItem.SetSupplementaryAnnotations(saCreator); // the preceeding code has been unit tested in MergeDbSnp1kpEvs() // write the supplementary annotation file using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { var observedDataSourceVersions = reader.Header.DataSourceVersions; // check the data source versions Assert.Equal(observedDataSourceVersions.Count, 3); var observedDataSourceVersion = observedDataSourceVersions[0]; Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name); Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version); Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks); // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition; Assert.NotNull(observedAnnotation1); var expDbSnp = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(expDbSnp); var expOneKg = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(expOneKg); var expEvs = sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(expEvs); var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation; Assert.NotNull(obsDbSnp); var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation; Assert.NotNull(obsOneKg); var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation; Assert.NotNull(obsEvs); Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp); Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll); Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc); Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage); Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples); } File.Delete(randomPath); File.Delete(randomPath + ".idx"); }
/// <summary> /// constructor /// </summary> public OneKGenTests(ChromosomeRenamerFixture fixture) { _renamer = fixture.Renamer; _oneKGenReader = new OneKGenReader(_renamer); }
// constructor public CreateSupplementaryDatabase( string compressedReferencePath, string nsdBaseFileName, string dbSnpFileName = null, string cosmicVcfFile = null, string cosmicTsvFile = null, string clinVarFileName = null, string oneKGenomeAfFileName = null, string evsFileName = null, string exacFileName = null, List <string> customFiles = null, string dgvFileName = null, string oneKSvFileName = null, string clinGenFileName = null, string chrWhiteList = null) { _nsdBaseFileName = nsdBaseFileName; _dataSources = new List <DataSourceVersion>(); _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >(); _supplementaryIntervalList = new List <SupplementaryInterval>(); Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion); _compressedSequence = new CompressedSequence(); var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence); _renamer = _compressedSequence.Renamer; _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence); if (!string.IsNullOrEmpty(chrWhiteList)) { Console.WriteLine("Creating SA for the following chromosomes only:"); foreach (var refSeq in chrWhiteList.Split(',')) { InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq)); Console.Write(refSeq + ","); } Console.WriteLine(); } else { InputFileParserUtilities.ChromosomeWhiteList = null; } if (dbSnpFileName != null) { AddSourceVersion(dbSnpFileName); var dbSnpReader = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer); var dbSnpEnumerator = dbSnpReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dbSnpEnumerator); } if (cosmicVcfFile != null && cosmicTsvFile != null) { AddSourceVersion(cosmicVcfFile); var cosmicReader = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer); var cosmicEnumerator = cosmicReader.GetEnumerator(); _iSupplementaryDataItemList.Add(cosmicEnumerator); } if (oneKGenomeAfFileName != null) { AddSourceVersion(oneKGenomeAfFileName); var oneKGenReader = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer); var oneKGenEnumerator = oneKGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenEnumerator); } if (oneKSvFileName != null) { if (oneKGenomeAfFileName == null) { AddSourceVersion(oneKSvFileName); } var oneKGenSvReader = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer); var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenSvEnumerator); } if (evsFileName != null) { AddSourceVersion(evsFileName); var evsReader = new EvsReader(new FileInfo(evsFileName), _renamer); var evsEnumerator = evsReader.GetEnumerator(); _iSupplementaryDataItemList.Add(evsEnumerator); } if (exacFileName != null) { AddSourceVersion(exacFileName); var exacReader = new ExacReader(new FileInfo(exacFileName), _renamer); var exacEnumerator = exacReader.GetEnumerator(); _iSupplementaryDataItemList.Add(exacEnumerator); } if (clinVarFileName != null) { AddSourceVersion(clinVarFileName); var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence); var clinVarList = clinVarReader.ToList(); clinVarList.Sort(); Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file"); IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator(); _iSupplementaryDataItemList.Add(clinVarEnumerator); } if (dgvFileName != null) { AddSourceVersion(dgvFileName); var dgvReader = new DgvReader(new FileInfo(dgvFileName), _renamer); var dgvEnumerator = dgvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dgvEnumerator); } if (clinGenFileName != null) { AddSourceVersion(clinGenFileName); var clinGenReader = new ClinGenReader(new FileInfo(clinGenFileName), _renamer); var clinGenEnumerator = clinGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(clinGenEnumerator); } if (customFiles != null) { foreach (var customFile in customFiles) { AddSourceVersion(customFile); var customReader = new CustomAnnotationReader(new FileInfo(customFile), _renamer); var customEnumerator = customReader.GetEnumerator(); _iSupplementaryDataItemList.Add(customEnumerator); } } // initializing the IEnumerators in the list foreach (var iDataEnumerator in _iSupplementaryDataItemList) { if (!iDataEnumerator.MoveNext()) { _iSupplementaryDataItemList.Remove(iDataEnumerator); } } _additionalItemsList = new List <SupplementaryDataItem>(); }