コード例 #1
0
 /// <summary>
 /// constructor
 /// </summary>
 public MergeSaTests(ChromosomeRenamerFixture fixture)
 {
     _renamer       = fixture.Renamer;
     _sequence      = fixture.Sequence;
     _reader        = fixture.Reader;
     _oneKGenReader = new OneKGenReader(_renamer);
 }
コード例 #2
0
        public void MultiAltAlleleTest()
        {
            const string vcfLine =
                "1	15274	rs62636497	A	G,T	100	PASS	AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633";

            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(15274, "A", 'C', _refChromDict));
            var oneKGenItems  = oneKGenReader.ExtractItems(vcfLine).ToList();

            Assert.Equal(2, oneKGenItems.Count);

            var json1 = oneKGenItems[0].GetJsonString();
            var json2 = oneKGenItems[1].GetJsonString();

            Assert.Equal("0.347244", GetAlleleFrequency(json1, "allAf"));
            Assert.Equal("0.322995", GetAlleleFrequency(json1, "afrAf"));
            Assert.Equal("0.275216", GetAlleleFrequency(json1, "amrAf"));
            Assert.Equal("0.481151", GetAlleleFrequency(json1, "easAf"));
            Assert.Equal("0.292247", GetAlleleFrequency(json1, "eurAf"));
            Assert.Equal("0.349693", GetAlleleFrequency(json1, "sasAf"));

            Assert.Equal("0.640974", GetAlleleFrequency(json2, "allAf"));
            Assert.Equal("0.636914", GetAlleleFrequency(json2, "afrAf"));
            Assert.Equal("0.720461", GetAlleleFrequency(json2, "amrAf"));
            Assert.Equal("0.518849", GetAlleleFrequency(json2, "easAf"));
            Assert.Equal("0.707753", GetAlleleFrequency(json2, "eurAf")); //double check this one: 0.7077535
            Assert.Equal("0.647239", GetAlleleFrequency(json2, "sasAf"));
        }
コード例 #3
0
        public void MergeDbSnp1Kg()
        {
            //NIR-1262
            const string vcfLine =
                "1	825069	rs4475692	G	A,C	.	.	RS=4475692;RSPOS=825069;dbSNPBuildID=111;SSR=0;SAO=0;VP=0x050100000005170126000100;WGT=1;VC=SNV;SLO;ASP;VLD;G5A;G5;GNO;KGPhase3;CAF=0.3227,.,0.6773;COMMON=1";
            const string vcfLine1Kg =
                "1	825069	rs4475692	G	C	100	PASS	AC=3392;AF=0.677316;AN=5008;NS=2504;DP=22495;EAS_AF=0.754;AMR_AF=0.5692;AFR_AF=0.6127;EUR_AF=0.7286;SAS_AF=0.7096;AA=g|||;VT=SNP;EAS_AN=1008;EAS_AC=760;EUR_AN=1006;EUR_AC=733;AFR_AN=1322;AFR_AC=810;AMR_AN=694;AMR_AC=395;SAS_AN=978;SAS_AC=694\tGT";

            var dbsnpReader = new DbSnpReader(_renamer);
            var sa          = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069));

            foreach (var dbSnpEntry in dbsnpReader.ExtractItem(vcfLine))
            {
                dbSnpEntry.SetSupplementaryAnnotations(sa);
            }

            var sa1         = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069));
            var onekgReader = new OneKGenReader(_renamer);

            foreach (var onekgItem in onekgReader.ExtractItems(vcfLine1Kg))
            {
                onekgItem.SetSupplementaryAnnotations(sa1);
            }

            sa.MergeSaCreator(sa1);
            sa.FinalizePositionalAnnotations();

            Assert.Equal("C", sa.SaPosition.GlobalMajorAllele);
            Assert.Equal("G", sa.SaPosition.GlobalMinorAllele);
        }
コード例 #4
0
ファイル: OneKGenTests.cs プロジェクト: shannonnana/Nirvana
        public void MissingSubPopulationFrequencies()
        {
            const string vcfLine = "1\t10616\trs376342519\tCCGCCGTTGCAAAGGCGCGCCG\tC\t100\tPASS\tAN=5008;AC=4973;AF=0.993011;AA=;EAS_AN=1008;EAS_AC=999;EAS_AF=0.9911;EUR_AN=1006;EUR_AC=1000;EUR_AF=0.994;AFR_AN=1322;AFR_AC=1308;AFR_AF=0.9894;AMR_AN=694;AMR_AC=691;AMR_AF=0.9957;SAS_AN=978;SAS_AC=975;SAS_AF=0.9969";

            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10616, "CCGCCGTTGCAAAGGCGCGCCG", 'C', ChromosomeUtilities.RefNameToChromosome));
            var items         = oneKGenReader.ExtractItems(vcfLine).ToList();

            Assert.Single(items);
            Assert.Equal("\"allAf\":0.993011,\"afrAf\":0.98941,\"amrAf\":0.995677,\"easAf\":0.991071,\"eurAf\":0.994036,\"sasAf\":0.996933,\"allAn\":5008,\"afrAn\":1322,\"amrAn\":694,\"easAn\":1008,\"eurAn\":1006,\"sasAn\":978,\"allAc\":4973,\"afrAc\":1308,\"amrAc\":691,\"easAc\":999,\"eurAc\":1000,\"sasAc\":975", items[0].GetJsonString());
        }
コード例 #5
0
 /// <summary>
 /// constructor
 /// </summary>
 public OneKGenTests()
 {
     _refChromDict = new Dictionary <string, IChromosome>
     {
         { "1", new Chromosome("chr1", "1", 0) },
         { "4", new Chromosome("chr4", "4", 3) },
         { "X", new Chromosome("chrX", "X", 22) }
     };
     _oneKGenReader = new OneKGenReader(_refChromDict);
 }
コード例 #6
0
        public void Merge1KgEvsExac()
        {
            const string vcfLine1 =
                "1	13382	rs191719684	C	G	.	PASS	DBSNP=dbSNP_135;EA_AC=0,8600;AA_AC=17,4389;TAC=17,12989;MAF=0.0,0.3858,0.1307;GTS=GG,GC,CC;EA_GTC=0,0,4300;AA_GTC=0,17,2186;GTC=0,17,6486;DP=54;GL=SAMD11;CP=0.0;CG=1.5;AA=C;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_152486.2:intron;HGVS_CDNA_VAR=NM_152486.2:c.-30C>G;HGVS_PROTEIN_VAR=.;CDS_SIZES=NM_152486.2:2046;GS=.;PH=.;EA_AGE=.;AA_AGE=24.3+/-50.5";
            const string vcfLine2 =
                "1	13382	.	C	G	320.40	VQSRTrancheSNP99.60to99.80	AC=3;AC_AFR=0;AC_AMR=0;AC_Adj=1;AC_EAS=0;AC_FIN=0;AC_Het=1;AC_Hom=0;AC_NFE=0;AC_OTH=0;AC_SAS=1;AF=1.079e-04;AN=27810;AN_AFR=460;AN_AMR=82;AN_Adj=5728;AN_EAS=148;AN_FIN=4;AN_NFE=1400;AN_OTH=60;AN_SAS=3574;BaseQRankSum=-8.880e-01;ClippingRankSum=0.493;DP=86138;FS=0.000;GQ_MEAN=11.35;GQ_STDDEV=12.58;Het_AFR=0;Het_AMR=0;Het_EAS=0;Het_FIN=0;Het_NFE=0;Het_OTH=0;Het_SAS=1;Hom_AFR=0;Hom_AMR=0;Hom_EAS=0;Hom_FIN=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;InbreedingCoeff=-0.0832;MQ=34.49;MQ0=0;MQRankSum=-6.910e-01;NCC=72140;QD=20.03;ReadPosRankSum=-2.073e+00;VQSLOD=-4.106e+00;culprit=MQ;DP_HIST=9135|1821|1658|665|130|135|199|110|41|8|2|1|0|0|0|0|0|0|0|0,1|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=1432|8682|140|118|2625|254|121|17|3|1|10|24|364|94|9|11|0|0|0|0,0|1|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0;DOUBLETON_DIST=.;AC_MALE=1;AC_FEMALE=0;AN_MALE=3866;AN_FEMALE=1862;AC_CONSANGUINEOUS=0;AN_CONSANGUINEOUS=684;Hom_CONSANGUINEOUS=0";
            const string vcfLine3 =
                "1	13382	rs538606945	C	G	100	PASS	AC=1;AF=0.000199681;AN=5008;NS=2504;DP=28817;EAS_AF=0;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0.001;AA=c|||;VT=SNP";

            var sa1       = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine1)[0];

            evsItem.SetSupplementaryAnnotations(sa1);

            var sa2        = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var exacReader = new ExacReader(_renamer);
            var exacItem   = exacReader.ExtractItems(vcfLine2)[0];

            exacItem.SetSupplementaryAnnotations(sa2);

            var sa3        = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var onekReader = new OneKGenReader(_renamer);
            var onekItem   = onekReader.ExtractItems(vcfLine3)[0];

            onekItem.SetSupplementaryAnnotations(sa3);

            sa1.MergeSaCreator(sa2);
            sa1.MergeSaCreator(sa3);

            var asa   = sa1.SaPosition.AlleleSpecificAnnotations["G"];
            var exac  = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
            var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation;
            var evs   = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation;

            Assert.NotNull(exac);
            Assert.NotNull(oneKg);
            Assert.NotNull(evs);

            var oneKgAc   = oneKg.OneKgAllAc;
            var oneKgAn   = oneKg.OneKgAllAn;
            var exacAllAc = exac.ExacAllAc;
            var exacAllAn = exac.ExacAllAn;

            Assert.NotNull(oneKgAc);
            Assert.NotNull(oneKgAn);
            Assert.NotNull(exacAllAc);
            Assert.NotNull(exacAllAn);

            Assert.Equal("0.0002", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat));
            Assert.Equal("0.001307", evs.EvsAll);
            Assert.Equal("0.000175", (exacAllAc.Value / (double)exacAllAn.Value).ToString(JsonCommon.FrequencyRoundingFormat));
        }
コード例 #7
0
        public void AlleleFrequencyTest()
        {
            const string vcfLine =
                "1	10352	rs555500075	T	TA	100	PAS	AC=2191;AF=0.4375;AN=5008;NS=2504;DP=88915;EAS_AF=0.4306;AMR_AF=0.4107;AFR_AF=0.4788;EUR_AF=0.4264;SAS_AF=0.4192;AA=|||unknown(NO_COVERAGE); VT=INDEL;EAS_AN=1008;EAS_AC=434;EUR_AN=1006;EUR_AC=429;AFR_AN=1322;AFR_AC=633;AMR_AN=694;AMR_AC=285;SAS_AN=978;SAS_AC=410";
            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10352, "T", 'C', _refChromDict));
            var oneKItem      = oneKGenReader.ExtractItems(vcfLine).First().GetJsonString();

            Assert.Equal("0.4375", GetAlleleFrequency(oneKItem, "allAf"));
            Assert.Equal("0.47882", GetAlleleFrequency(oneKItem, "afrAf"));
            Assert.Equal("0.410663", GetAlleleFrequency(oneKItem, "amrAf"));
            Assert.Equal("0.430556", GetAlleleFrequency(oneKItem, "easAf"));
            Assert.Equal("0.426441", GetAlleleFrequency(oneKItem, "eurAf"));
            Assert.Equal("0.419223", GetAlleleFrequency(oneKItem, "sasAf"));
            Assert.DoesNotContain("ancestralAllele", oneKItem);
        }
コード例 #8
0
        public void PrioritizingSymbolicAllele4Svs()
        {
            const string vcfLine =
                "X	101155257	rs373174489	GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT	G	100	PASS	AN=3775;AC=1723;AF=0.456424;AA=;EAS_AN=764;EAS_AC=90;EAS_AF=0.1178;EUR_AN=766;EUR_AC=439;EUR_AF=0.5731;AFR_AN=1003;AFR_AC=839;AFR_AF=0.8365;AMR_AN=524;AMR_AC=180;AMR_AF=0.3435;SAS_AN=718;SAS_AC=175;SAS_AF=0.2437";

            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(101155257, "GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT", 'C', _refChromDict));
            var oneKItems     = oneKGenReader.ExtractItems(vcfLine);
            var json1         = oneKItems.First().GetJsonString();

            Assert.Equal("0.456424", GetAlleleFrequency(json1, "allAf"));
            Assert.Equal("0.836491", GetAlleleFrequency(json1, "afrAf"));
            Assert.Equal("0.343511", GetAlleleFrequency(json1, "amrAf"));
            Assert.Equal("0.117801", GetAlleleFrequency(json1, "easAf"));
            Assert.Equal("0.573107", GetAlleleFrequency(json1, "eurAf"));
            Assert.Equal("0.243733", GetAlleleFrequency(json1, "sasAf"));
        }
コード例 #9
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_');

            using (var oneKGenReader = new OneKGenReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.OneKgenTag, true, false, SaCommon.SchemaVersion, false))
                        {
                            writer.Write(oneKGenReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
コード例 #10
0
        private void CreateOnekgTsv(string fileName)
        {
            if (fileName == null)
            {
                return;
            }
            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            using (var tsvWriter = new OnekgTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var onekgReader = new OneKGenReader(new FileInfo(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(onekgReader.GetOneKGenItems(), tsvWriter);
            }
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("OneKg", version.Version, timeSpan);
        }
コード例 #11
0
        public void MultiAlleleMergeDbSnp1KpEvsSaRw()
        {
            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	1564952	rs112177324	TG	T	.	.	RS=112177324;RSPOS=1564953;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x05010008000514013e000200;WGT=1;VC=DIV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.8468,0.1506;COMMON=1";
            const string vcfLine2 = "1	1564952	rs112177324	TG	TGG,T	100	PASS	AC=13,754;AF=0.00259585,0.150559;AN=5008;NS=2504;DP=8657;EAS_AF=0,0.0933;AMR_AF=0.0014,0.2046;AFR_AF=0.0091,0.0182;EUR_AF=0,0.3588;SAS_AF=0,0.136";
            const string vcfLine3 = "1	1564952	rs112177324	TG	TGG,T	.	PASS	BSNP=dbSNP_132;EA_AC=2,3039,4701;AA_AC=44,279,3231;TAC=46,3318,7932;MAF=39.2793,9.0884,29.7805;GTS=A1A1,A1A2,A1R,A2A2,A2R,RR;EA_GTC=0,1,1,707,1624,1538;AA_GTC=4,4,32,41,193,1503;GTC=4,5,33,748,1817,3041;DP=10;GL=MIB2;CP=0.8;CG=-0.0;AA=.;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_080875.2:intron,NM_080875.2:intron,NM_001170689.1:intron,NM_001170689.1:intron,NM_001170688.1:intron,NM_001170688.1:intron,NM_001170687.1:intron,NM_001170687.1:intron,NM_001170686.1:intron,NM_001170686.1:intron;HGVS_CDNA_VAR=NM_080875.2:c.2908+7del1,NM_080875.2:c.2908+6_2908+7insG,NM_001170689.1:c.2187-66del1,NM_001170689.1:c.2187-67_2187-66insG,NM_001170688.1:c.2713+7del1,NM_001170688.1:c.2713+6_2713+7insG,NM_001170687.1:c.2866+7del1,NM_001170687.1:c.2866+6_2866+7insG,NM_001170686.1:c.2896+7del1,NM_001170686.1:c.2896+6_28967insG;HGVS_PROTEIN_VAR=.,.,.,.,.,.,.,.,.,.;CDS_SIZES=NM_080875.2:3213,NM_080875.2:3213,NM_001170689.1:2262,NM_001170689.1:2262,NM_001170688.1:3018,NM_001170688.1:3018,NM_001170687.1:3171,NM_001170687.1:3171,NM_001170686.1:3201,NM_001170686.1:3201;GS=.,.,.,.,.,.,.,.,.,.;PH=.,.,.,.,.,.,.,.,.,.;EA_AGE=.;AA_AGE=.";

            var sa        = new SupplementaryAnnotationPosition(1564953);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader     = new DbSnpReader(_renamer);
            var dbSnpItem       = dbsnpReader.ExtractItem(vcfLine1)[0];
            var additionalItems = new List <SupplementaryDataItem>
            {
                dbSnpItem.SetSupplementaryAnnotations(saCreator)
            };

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            additionalItems.Add(oneKGenItem.SetSupplementaryAnnotations(saCreator));

            var evsReader    = new EvsReader(_renamer);
            var evsItemsList = evsReader.ExtractItems(vcfLine3);

            foreach (var evsItem in evsItemsList)
            {
                additionalItems.Add(evsItem.SetSupplementaryAnnotations(saCreator));
            }

            foreach (var item in additionalItems)
            {
                item.SetSupplementaryAnnotations(saCreator);
            }

            // write the supplementary annotation file
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // checking the global alleles
                Assert.Null(sa.GlobalMajorAllele);
                Assert.Null(sa.GlobalMajorAlleleFrequency);
                Assert.Null(sa.GlobalMinorAllele);
                Assert.Null(sa.GlobalMinorAlleleFrequency);

                // extract the three annotations
                var observedAnnotation = reader.GetAnnotation(1564953) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation);

                var expectedInsOneKgAllAc = ((OneKGenAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc;
                var expectedDelHasOneKg   = sa.AlleleSpecificAnnotations["1"].HasDataSource(DataSourceCommon.DataSource.OneKg);

                var expectedInsEvsAfr = ((EvsAnnotation)sa.AlleleSpecificAnnotations["iG"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr;

                var expectedInsHasDbSnp = sa.AlleleSpecificAnnotations["iG"].HasDataSource(DataSourceCommon.DataSource.DbSnp);

                var obsAsaIns = observedAnnotation.AlleleSpecificAnnotations["iG"];
                var obsAsaDel = observedAnnotation.AlleleSpecificAnnotations["1"];

                Assert.Equal(expectedInsOneKgAllAc, ((OneKGenAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]).OneKgAllAc);
                Assert.Equal(expectedDelHasOneKg, obsAsaDel.HasDataSource(DataSourceCommon.DataSource.OneKg));

                Assert.Equal(expectedInsEvsAfr, ((EvsAnnotation)obsAsaIns.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]).EvsAfr);
                Assert.Equal(expectedInsHasDbSnp, obsAsaIns.HasDataSource(DataSourceCommon.DataSource.DbSnp));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
コード例 #12
0
        public void ReadAndWriteDbSnp1KgEvs()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var oneKGenVersion = new DataSourceVersion("1000 Genomes Project", "phase3_shapeit2_mvncall_integrated_v5.", DateTime.Parse("2013-05-02").Ticks);
            var evsDataSource  = new DataSourceVersion("EVS", "V2", DateTime.Parse("2013-11-13").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, oneKGenVersion, evsDataSource
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "1	69428	rs140739101	T	G	.	.	RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1";
            const string vcfLine2 = "1	69428	rs140739101	T	G	100	PASS	AC=95;AF=0.0189696;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||";
            const string vcfLine3 = "1	69428	rs140739101	T	G	.	PASS	BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=.";

            const string altAllele = "G";
            var          sa        = new SupplementaryAnnotationPosition(69428);
            var          saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem   = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem.SetSupplementaryAnnotations(saCreator);

            var oneKGenReader = new OneKGenReader(_renamer);
            var oneKGenItem   = oneKGenReader.ExtractItems(vcfLine2)[0];

            oneKGenItem.SetSupplementaryAnnotations(saCreator);

            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine3)[0];

            evsItem.SetSupplementaryAnnotations(saCreator);

            // the preceeding code has been unit tested in  MergeDbSnp1kpEvs()

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                var observedDataSourceVersions = reader.Header.DataSourceVersions;

                // check the data source versions
                Assert.Equal(observedDataSourceVersions.Count, 3);

                var observedDataSourceVersion = observedDataSourceVersions[0];
                Assert.Equal(dbSnpVersion.Name, observedDataSourceVersion.Name);
                Assert.Equal(dbSnpVersion.Version, observedDataSourceVersion.Version);
                Assert.Equal(dbSnpVersion.ReleaseDateTicks, observedDataSourceVersion.ReleaseDateTicks);

                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(69428) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);

                var expDbSnp =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                    as DbSnpAnnotation;
                Assert.NotNull(expDbSnp);

                var expOneKg =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                    as OneKGenAnnotation;
                Assert.NotNull(expOneKg);

                var expEvs =
                    sa.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                    as EvsAnnotation;
                Assert.NotNull(expEvs);

                var obsDbSnp = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)]
                               as DbSnpAnnotation;
                Assert.NotNull(obsDbSnp);

                var obsOneKg = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)]
                               as OneKGenAnnotation;
                Assert.NotNull(obsOneKg);

                var obsEvs = observedAnnotation1.AlleleSpecificAnnotations[altAllele].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)]
                             as EvsAnnotation;
                Assert.NotNull(obsEvs);

                Assert.Equal(expDbSnp.DbSnp, obsDbSnp.DbSnp);

                Assert.Equal(expEvs.EvsAll, obsEvs.EvsAll);
                Assert.Equal(expOneKg.OneKgAllAc, obsOneKg.OneKgAllAc);

                Assert.Equal(expEvs.EvsCoverage, obsEvs.EvsCoverage);
                Assert.Equal(expEvs.NumEvsSamples, obsEvs.NumEvsSamples);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
コード例 #13
0
ファイル: OneKGenTests.cs プロジェクト: YuJiang01/Nirvana
 /// <summary>
 /// constructor
 /// </summary>
 public OneKGenTests(ChromosomeRenamerFixture fixture)
 {
     _renamer       = fixture.Renamer;
     _oneKGenReader = new OneKGenReader(_renamer);
 }
コード例 #14
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }