Example #1
0
        public void MergeConflictingExacItems()
        {
            const string vcfLine1 = "1	13528	.	C	G,T	1771.54	VQSRTrancheSNP99.60to99.80	AC=21,11;AC_AFR=12,0;AC_AMR=1,0;AC_Adj=13,9;AC_EAS=0,0;AC_FIN=0,0;AC_Het=13,9,0;AC_Hom=0,0;AC_NFE=0,2;AC_OTH=0,0;AC_SAS=0,7;AF=6.036e-04,3.162e-04;AN=34792;AN_AFR=390;AN_AMR=116;AN_Adj=10426;AN_EAS=150;AN_FIN=8;AN_NFE=2614;AN_OTH=116;AN_SAS=7032;BaseQRankSum=1.23;ClippingRankSum=0.056;DP=144988;FS=0.000;GQ_MEAN=14.54;GQ_STDDEV=16.53;Het_AFR=12,0,0;Het_AMR=1,0,0;Het_EAS=0,0,0;Het_FIN=0,0,0;Het_NFE=0,2,0;Het_OTH=0,0,0;Het_SAS=0,7,0;Hom_AFR=0,0;Hom_AMR=0,0;Hom_EAS=0,0;Hom_FIN=0,0;Hom_NFE=0,0;Hom_OTH=0,0;Hom_SAS=0,0;InbreedingCoeff=0.0557;MQ=31.08;MQ0=0;MQRankSum=-5.410e-01;NCC=67387;QD=1.91;ReadPosRankSum=0.206;VQSLOD=-2.705e+00;culprit=MQ;DP_HIST=10573|1503|705|1265|2477|613|167|52|18|11|8|3|0|0|1|0|0|0|0|0,2|6|2|1|4|0|3|1|0|0|2|0|0|0|0|0|0|0|0|0,1|0|0|0|1|1|3|0|1|1|1|0|0|0|1|0|0|0|0|0;GQ_HIST=342|11195|83|56|3154|517|367|60|12|4|5|7|1373|180|15|16|1|0|1|8,0|0|1|0|1|0|3|1|0|1|2|0|1|2|0|1|1|0|1|6,0|1|0|0|1|1|0|0|1|0|0|1|1|1|1|0|0|0|0|2";

            const string vcfLine2 =
                "1	13528	.	C	T	334.33	VQSRTrancheSNP99.60to99.80	AC=2;AC_AFR=0;AC_AMR=0;AC_Adj=2;AC_EAS=0;AC_FIN=0;AC_Het=2;AC_Hom=0;AC_NFE=0;AC_OTH=0;AC_SAS=2;AF=5.957e-05;AN=33576;AN_AFR=392;AN_AMR=114;AN_Adj=10200;AN_EAS=146;AN_FIN=6;AN_NFE=2556;AN_OTH=110;AN_SAS=6876;BaseQRankSum=-1.988e+00;ClippingRankSum=0.525;DP=142450;FS=2.634;GQ_MEAN=14.30;GQ_STDDEV=15.90;Het_AFR=0;Het_AMR=0;Het_EAS=0;Het_FIN=0;Het_NFE=0;Het_OTH=0;Het_SAS=2;Hom_AFR=0;Hom_AMR=0;Hom_EAS=0;Hom_FIN=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;InbreedingCoeff=-0.0753;MQ=31.78;MQ0=0;MQRankSum=0.578;NCC=68350;QD=5.31;ReadPosRankSum=-5.730e-01;VQSLOD=-3.582e+00;culprit=MQ;DP_HIST=10108|1417|742|1238|2324|682|184|56|20|11|4|2|0|0|0|0|0|0|0|0,0|0|0|1|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0;GQ_HIST=335|10726|91|50|3215|542|410|67|10|3|1|6|1138|163|14|15|0|0|0|2,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|2;DOUBLETON_DIST=0.028857408061;AC_MALE=1;AC_FEMALE=1;AN_MALE=7294;AN_FEMALE=2906;AC_CONSANGUINEOUS=0;AN_CONSANGUINEOUS=1360;Hom_CONSANGUINEOUS=0;";

            var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13528));
            var sa2 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13528));

            var exacReader = new ExacReader(_renamer);
            var exacItems  = exacReader.ExtractItems(vcfLine1);

            foreach (var item in exacItems)
            {
                item.SetSupplementaryAnnotations(sa1);
            }

            exacItems.Clear();
            exacItems.AddRange(exacReader.ExtractItems(vcfLine2));

            foreach (var item in exacItems)
            {
                item?.SetSupplementaryAnnotations(sa2);
            }

            sa1.MergeSaCreator(sa2);

            var exac =
                sa1.SaPosition.AlleleSpecificAnnotations["T"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                ExacAnnotation;

            Assert.NotNull(exac);
            Assert.True(exac.HasConflicts);
        }
Example #2
0
        public void ExacExtraction()
        {
            const string vcfLine = "1	13528	.	C	G,T	1771.54	VQSRTrancheSNP99.60to99.80	AC=21,11;AC_AFR=12,0;AC_AMR=1,0;AC_Adj=13,9;AC_EAS=0,0;AC_FIN=0,0;AC_Het=13,9,0;AC_Hom=0,0;AC_NFE=0,2;AC_OTH=0,0;AC_SAS=0,7;AF=6.036e-04,3.162e-04;AN=34792;AN_AFR=390;AN_AMR=116;AN_Adj=10426;AN_EAS=150;AN_FIN=8;AN_NFE=2614;AN_OTH=116;AN_SAS=7032;BaseQRankSum=1.23;ClippingRankSum=0.056;DP=144988;FS=0.000;GQ_MEAN=14.54;GQ_STDDEV=16.53;Het_AFR=12,0,0;Het_AMR=1,0,0;Het_EAS=0,0,0;Het_FIN=0,0,0;Het_NFE=0,2,0;Het_OTH=0,0,0;Het_SAS=0,7,0;Hom_AFR=0,0;Hom_AMR=0,0;Hom_EAS=0,0;Hom_FIN=0,0;Hom_NFE=0,0;Hom_OTH=0,0;Hom_SAS=0,0;InbreedingCoeff=0.0557;MQ=31.08;MQ0=0;MQRankSum=-5.410e-01;NCC=67387;QD=1.91;ReadPosRankSum=0.206;VQSLOD=-2.705e+00;culprit=MQ;DP_HIST=10573|1503|705|1265|2477|613|167|52|18|11|8|3|0|0|1|0|0|0|0|0,2|6|2|1|4|0|3|1|0|0|2|0|0|0|0|0|0|0|0|0,1|0|0|0|1|1|3|0|1|1|1|0|0|0|1|0|0|0|0|0;GQ_HIST=342|11195|83|56|3154|517|367|60|12|4|5|7|1373|180|15|16|1|0|1|8,0|0|1|0|1|0|3|1|0|1|2|0|1|2|0|1|1|0|1|6,0|1|0|0|1|1|0|0|1|0|0|1|1|1|1|0|0|0|0|2";

            var exacReader = new ExacReader(null, _refChromDict);
            var exacItems  = exacReader.ExtractItems(vcfLine);

            var allAlleleNumber  = exacItems[0].AllAlleleNumber;
            var allAlleleCount   = exacItems[0].AllAlleleCount;
            var allAlleleNumber2 = exacItems[1].AllAlleleNumber;
            var allAlleleCount2  = exacItems[1].AllAlleleCount;

            Assert.NotNull(allAlleleNumber);
            Assert.NotNull(allAlleleCount);
            Assert.NotNull(allAlleleNumber2);
            Assert.NotNull(allAlleleCount2);

            Assert.Equal(10426, allAlleleNumber.Value);
            Assert.Equal(28, exacItems[0].Coverage);
            Assert.Equal("0.001247", (allAlleleCount.Value / (double)allAlleleNumber.Value).ToString(JsonCommon.FrequencyRoundingFormat));

            Assert.Equal(10426, allAlleleNumber2.Value);
            Assert.Equal(28, exacItems[1].Coverage);
            Assert.Equal("0.000863", (allAlleleCount2.Value / (double)allAlleleNumber2.Value).ToString(JsonCommon.FrequencyRoundingFormat));
        }
Example #3
0
        public void Merge1KgEvsExac()
        {
            const string vcfLine1 =
                "1	13382	rs191719684	C	G	.	PASS	DBSNP=dbSNP_135;EA_AC=0,8600;AA_AC=17,4389;TAC=17,12989;MAF=0.0,0.3858,0.1307;GTS=GG,GC,CC;EA_GTC=0,0,4300;AA_GTC=0,17,2186;GTC=0,17,6486;DP=54;GL=SAMD11;CP=0.0;CG=1.5;AA=C;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_152486.2:intron;HGVS_CDNA_VAR=NM_152486.2:c.-30C>G;HGVS_PROTEIN_VAR=.;CDS_SIZES=NM_152486.2:2046;GS=.;PH=.;EA_AGE=.;AA_AGE=24.3+/-50.5";
            const string vcfLine2 =
                "1	13382	.	C	G	320.40	VQSRTrancheSNP99.60to99.80	AC=3;AC_AFR=0;AC_AMR=0;AC_Adj=1;AC_EAS=0;AC_FIN=0;AC_Het=1;AC_Hom=0;AC_NFE=0;AC_OTH=0;AC_SAS=1;AF=1.079e-04;AN=27810;AN_AFR=460;AN_AMR=82;AN_Adj=5728;AN_EAS=148;AN_FIN=4;AN_NFE=1400;AN_OTH=60;AN_SAS=3574;BaseQRankSum=-8.880e-01;ClippingRankSum=0.493;DP=86138;FS=0.000;GQ_MEAN=11.35;GQ_STDDEV=12.58;Het_AFR=0;Het_AMR=0;Het_EAS=0;Het_FIN=0;Het_NFE=0;Het_OTH=0;Het_SAS=1;Hom_AFR=0;Hom_AMR=0;Hom_EAS=0;Hom_FIN=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;InbreedingCoeff=-0.0832;MQ=34.49;MQ0=0;MQRankSum=-6.910e-01;NCC=72140;QD=20.03;ReadPosRankSum=-2.073e+00;VQSLOD=-4.106e+00;culprit=MQ;DP_HIST=9135|1821|1658|665|130|135|199|110|41|8|2|1|0|0|0|0|0|0|0|0,1|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=1432|8682|140|118|2625|254|121|17|3|1|10|24|364|94|9|11|0|0|0|0,0|1|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0;DOUBLETON_DIST=.;AC_MALE=1;AC_FEMALE=0;AN_MALE=3866;AN_FEMALE=1862;AC_CONSANGUINEOUS=0;AN_CONSANGUINEOUS=684;Hom_CONSANGUINEOUS=0";
            const string vcfLine3 =
                "1	13382	rs538606945	C	G	100	PASS	AC=1;AF=0.000199681;AN=5008;NS=2504;DP=28817;EAS_AF=0;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0.001;AA=c|||;VT=SNP";

            var sa1       = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine1)[0];

            evsItem.SetSupplementaryAnnotations(sa1);

            var sa2        = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var exacReader = new ExacReader(_renamer);
            var exacItem   = exacReader.ExtractItems(vcfLine2)[0];

            exacItem.SetSupplementaryAnnotations(sa2);

            var sa3        = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(13382));
            var onekReader = new OneKGenReader(_renamer);
            var onekItem   = onekReader.ExtractItems(vcfLine3)[0];

            onekItem.SetSupplementaryAnnotations(sa3);

            sa1.MergeSaCreator(sa2);
            sa1.MergeSaCreator(sa3);

            var asa   = sa1.SaPosition.AlleleSpecificAnnotations["G"];
            var exac  = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
            var oneKg = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.OneKg)] as OneKGenAnnotation;
            var evs   = asa.Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Evs)] as EvsAnnotation;

            Assert.NotNull(exac);
            Assert.NotNull(oneKg);
            Assert.NotNull(evs);

            var oneKgAc   = oneKg.OneKgAllAc;
            var oneKgAn   = oneKg.OneKgAllAn;
            var exacAllAc = exac.ExacAllAc;
            var exacAllAn = exac.ExacAllAn;

            Assert.NotNull(oneKgAc);
            Assert.NotNull(oneKgAn);
            Assert.NotNull(exacAllAc);
            Assert.NotNull(exacAllAn);

            Assert.Equal("0.0002", (oneKgAc.Value / (double)oneKgAn.Value).ToString(JsonCommon.FrequencyRoundingFormat));
            Assert.Equal("0.001307", evs.EvsAll);
            Assert.Equal("0.000175", (exacAllAc.Value / (double)exacAllAn.Value).ToString(JsonCommon.FrequencyRoundingFormat));
        }
        private void CreateExacTsv(string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }
            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            using (var writer = new ExacTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var exacReader = new ExacReader(new FileInfo(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(exacReader.GetExacItems(), writer);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("EXaC", version.Version, timeSpan);
        }
Example #5
0
        public void ReadAndWriteExacWithMultipleAlleles()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var exacVersion = new DataSourceVersion("ExAC", "0.3.1", DateTime.Parse("2016-03-16").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                exacVersion
            };

            // create our expected supplementary annotations: note AN_adj is modified in this
            const string vcfline =
                "19	3121452	.	TA	T,TAA	17262.47	AC_Adj0_Filter	AC=6,9;AC_AFR=0,0;AC_AMR=0,0;AC_Adj=0,0;AC_EAS=0,0;AC_FIN=0,0;AC_Het=0,0,0;AC_Hom=0,0;AC_NFE=0,0;AC_OTH=0,0;AC_SAS=0,0;AF=4.587e-03,6.881e-03;AN=1308;AN_AFR=0;AN_AMR=0;AN_Adj=3;AN_EAS=0;AN_FIN=0;AN_NFE=0;AN_OTH=0;AN_SAS=0;BaseQRankSum=0.437;DP=2838";

            var sa        = new SupplementaryAnnotationPosition(3121453);
            var saCreator = new SupplementaryPositionCreator(sa);

            var exacReader      = new ExacReader(_renamer);
            var additionalItems = new List <SupplementaryDataItem>();

            foreach (var exacItem in exacReader.ExtractItems(vcfline))
            {
                var currentItem = exacItem.SetSupplementaryAnnotations(saCreator);
                additionalItems.Add(currentItem);
            }
            var currentSa        = new SupplementaryAnnotationPosition(3121453);
            var currentSaCreator = new SupplementaryPositionCreator(currentSa);

            foreach (var exacItem in additionalItems)
            {
                exacItem.SetSupplementaryAnnotations(currentSaCreator);
            }

            // write the supplementary annotation file
            using (
                var writer = new SupplementaryAnnotationWriter(randomPath, "chr19",
                                                               expectedDataSourceVersions))
            {
                writer.Write(currentSaCreator, currentSa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(3121453) as SupplementaryAnnotationPosition;

                Assert.NotNull(observedAnnotation1);

                // we want to make sure we are reading the values we have written

                var expExaciA =
                    currentSa.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExaciA);
                var obsExaciA =
                    observedAnnotation1.AlleleSpecificAnnotations["iA"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExaciA);

                Assert.Equal(expExaciA.ExacAllAn, obsExaciA.ExacAllAn);

                Assert.Equal(expExaciA.ExacCoverage, obsExaciA.ExacCoverage);
                Assert.Equal(expExaciA.ExacAllAc, obsExaciA.ExacAllAc);
                Assert.NotNull(obsExaciA.ExacAllAc);
                Assert.Null(obsExaciA.ExacFinAc);
                Assert.Null(obsExaciA.ExacFinAn);

                // we want to make sure we are reading the values we have written

                var expExac1 =
                    currentSa.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(expExac1);
                var obsExac1 =
                    observedAnnotation1.AlleleSpecificAnnotations["1"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as
                    ExacAnnotation;
                Assert.NotNull(obsExac1);

                Assert.Equal(expExac1.ExacAllAn, obsExac1.ExacAllAn);

                Assert.Equal(expExac1.ExacCoverage, obsExac1.ExacCoverage);
                Assert.Equal(expExac1.ExacAllAc, obsExac1.ExacAllAc);
                Assert.NotNull(obsExac1.ExacAllAc);
                Assert.Null(obsExac1.ExacFinAc);
                Assert.Null(obsExac1.ExacFinAn);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Example #6
0
        public void ReadWriteExacDbsnp()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion  = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var cosmicVersion = new DataSourceVersion("COSMIC", "GRCh37_v71", DateTime.Parse("2014-10-21").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, cosmicVersion
            };

            // create our expected supplementary annotations
            const string vcfLine1 = "2	48010488	rs1042821	G	A	.	.	RS=1042821;RSPOS=48010488;RV;dbSNPBuildID=86;SSR=0;SAO=1;VP=0x050168420a05150136100100;GENEINFO=MSH6:2956;WGT=1;VC=SNV;PM;PMC;SLO;NSM;REF;U5;R5;ASP;VLD;G5;GNO;KGPhase1;KGPhase3;LSD;CAF=0.7991,0.2009;COMMON=1";
            const string vcfLine2 =
                "2	48010488	rs1042821	G	A,C	14068898.15	PASS	AC=21019,1;AC_AFR=1700,0;AC_AMR=1015,1;AC_Adj=19510,1;AC_EAS=1973,0;AC_FIN=743,0;AC_Het=15722,1,0;AC_Hom=1894,0;AC_NFE=10593,0;AC_OTH=147,0;AC_SAS=3339,0;AF=0.178,8.487e-06;AN=117830;AN_AFR=6388;AN_AMR=9014;AN_Adj=91130;AN_EAS=6792;AN_FIN=5078;AN_NFE=48404;AN_OTH=664;AN_SAS=14790;BaseQRankSum=-4.850e-01;ClippingRankSum=-1.400e-01;DB;DP=1206681;FS=0.000;GQ_MEAN=129.86;GQ_STDDEV=221.88;Het_AFR=1322,0,0;Het_AMR=931,1,0;Het_EAS=1511,0,0;Het_FIN=665,0,0;Het_NFE=8585,0,0;Het_OTH=111,0,0;Het_SAS=2597,0,0;Hom_AFR=189,0;Hom_AMR=42,0;Hom_EAS=231,0;Hom_FIN=39,0;Hom_NFE=1004,0;Hom_OTH=18,0;Hom_SAS=371,0;InbreedingCoeff=0.0376;MQ=60.00;MQ0=0;MQRankSum=0.00;NCC=3737;POSITIVE_TRAIN_SITE;QD=17.46;ReadPosRankSum=0.181;VQSLOD=5.87;culprit=MQ;DP_HIST=3051|9435|11318|5521|9711|11342|4131|1270|615|404|328|266|264|262|196|186|126|115|97|277,133|968|2180|3402|3564|2815|1772|954|551|389|321|263|261|261|196|186|126|115|97|277,0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;GQ_HIST=949|2966|347|492|15135|1435|1335|854|421|526|590|416|13672|1951|445|462|255|174|211|16279,24|79|81|124|135|96|110|118|97|180|228|137|182|191|126|171|180|151|192|16229,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1";

            var sa        = new SupplementaryAnnotationPosition(48010488);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem1  = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem1.SetSupplementaryAnnotations(saCreator);

            var exacReader = new ExacReader(_renamer);

            foreach (var exacItem in exacReader.ExtractItems(vcfLine2))
            {
                exacItem.SetSupplementaryAnnotations(saCreator);
            }


            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr2", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(48010488) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation1);


                var expDbSnpA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                var obsDbSnpA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;
                Assert.NotNull(expDbSnpA);
                Assert.NotNull(obsDbSnpA);

                // we want to make sure we are reading the values we have written
                Assert.Equal(expDbSnpA.DbSnp, obsDbSnpA.DbSnp);


                var expExacA =
                    sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacA =
                    observedAnnotation1.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacA);
                Assert.NotNull(obsExacA);

                Assert.Equal(expExacA.ExacAllAn, obsExacA.ExacAllAn);
                Assert.Equal(expExacA.ExacCoverage, obsExacA.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacA.ExacAllAc), Convert.ToDouble(obsExacA.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAfrAc), Convert.ToDouble(obsExacA.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacAmrAc), Convert.ToDouble(obsExacA.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacEasAc), Convert.ToDouble(obsExacA.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacFinAc), Convert.ToDouble(obsExacA.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacNfeAc), Convert.ToDouble(obsExacA.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacOthAc), Convert.ToDouble(obsExacA.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacA.ExacSasAc), Convert.ToDouble(obsExacA.ExacSasAc));


                var expExacC =
                    sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;
                var obsExacC =
                    observedAnnotation1.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.Exac)] as ExacAnnotation;

                Assert.NotNull(expExacC);
                Assert.NotNull(obsExacC);

                Assert.Equal(expExacC.ExacCoverage, obsExacC.ExacCoverage);
                Assert.Equal(Convert.ToDouble(expExacC.ExacAllAc), Convert.ToDouble(obsExacC.ExacAllAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAfrAc), Convert.ToDouble(obsExacC.ExacAfrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacAmrAc), Convert.ToDouble(obsExacC.ExacAmrAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacEasAc), Convert.ToDouble(obsExacC.ExacEasAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacFinAc), Convert.ToDouble(obsExacC.ExacFinAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacNfeAc), Convert.ToDouble(obsExacC.ExacNfeAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacOthAc), Convert.ToDouble(obsExacC.ExacOthAc));
                Assert.Equal(Convert.ToDouble(expExacC.ExacSasAc), Convert.ToDouble(obsExacC.ExacSasAc));
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Example #7
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }