Пример #1
0
        public void MergeDbSnp1Kg()
        {
            //NIR-1262
            const string vcfLine =
                "1	825069	rs4475692	G	A,C	.	.	RS=4475692;RSPOS=825069;dbSNPBuildID=111;SSR=0;SAO=0;VP=0x050100000005170126000100;WGT=1;VC=SNV;SLO;ASP;VLD;G5A;G5;GNO;KGPhase3;CAF=0.3227,.,0.6773;COMMON=1";
            const string vcfLine1Kg =
                "1	825069	rs4475692	G	C	100	PASS	AC=3392;AF=0.677316;AN=5008;NS=2504;DP=22495;EAS_AF=0.754;AMR_AF=0.5692;AFR_AF=0.6127;EUR_AF=0.7286;SAS_AF=0.7096;AA=g|||;VT=SNP;EAS_AN=1008;EAS_AC=760;EUR_AN=1006;EUR_AC=733;AFR_AN=1322;AFR_AC=810;AMR_AN=694;AMR_AC=395;SAS_AN=978;SAS_AC=694\tGT";

            var dbsnpReader = new DbSnpReader(_renamer);
            var sa          = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069));

            foreach (var dbSnpEntry in dbsnpReader.ExtractItem(vcfLine))
            {
                dbSnpEntry.SetSupplementaryAnnotations(sa);
            }

            var sa1         = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(825069));
            var onekgReader = new OneKGenReader(_renamer);

            foreach (var onekgItem in onekgReader.ExtractItems(vcfLine1Kg))
            {
                onekgItem.SetSupplementaryAnnotations(sa1);
            }

            sa.MergeSaCreator(sa1);
            sa.FinalizePositionalAnnotations();

            Assert.Equal("C", sa.SaPosition.GlobalMajorAllele);
            Assert.Equal("G", sa.SaPosition.GlobalMinorAllele);
        }
Пример #2
0
        public void MergeDbSnp1KpEvsRefMinor()
        {
            const string vcfLine1 = "1	69428	rs140739101	T	G	.	.	RS=140739101;RSPOS=69428;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050200000a05140026000100;WGT=1;VC=SNV;S3D;NSM;REF;ASP;VLD;KGPhase3;CAF=0.981,0.01897;COMMON=1";
            //vcf line is modified
            const string vcfLine2 = "1	69428	rs140739101	T	G	100	PASS	AC=4956;AF=0.989617;AN=5008;NS=2504;DP=17611;EAS_AF=0.003;AMR_AF=0.036;AFR_AF=0.0015;EUR_AF=0.0497;SAS_AF=0.0153;AA=.|||;VT=SNP;EX_TARGET;EAS_AN=1008;EAS_AC=3;EUR_AN=1006;EUR_AC=50;AFR_AN=1322;AFR_AC=2;AMR_AN=694;AMR_AC=25;SAS_AN=978;SAS_AC=15";
            const string vcfLine3 = "1	69428	rs140739101	T	G	.	PASS	BSNP=dbSNP_134;EA_AC=313,6535;AA_AC=14,3808;TAC=327,10343;MAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;EA_GTC=92,129,3203;AA_GTC=1,12,1898;GTC=93,141,5101;DP=110;GL=OR4F5;CP=1.0;CG=0.9;AA=T;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_001005484.1:missense;HGVS_CDNA_VAR=NM_001005484.1:c.338T>G;HGVS_PROTEIN_VAR=NM_001005484.1:p.(F113C);CDS_SIZES=NM_001005484.1:918;GS=205;PH=probably-damaging:0.999;EA_AGE=.;AA_AGE=.";

            var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(69428));

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpItem   = dbsnpReader.ExtractItem(vcfLine1)[0];

            dbSnpItem.SetSupplementaryAnnotations(sa);

            var oneKGenItem = _oneKGenReader.ExtractItems(vcfLine2)[0];

            oneKGenItem.SetSupplementaryAnnotations(sa);

            var evsReader = new EvsReader(_renamer);
            var evsItem   = evsReader.ExtractItems(vcfLine3)[0];

            evsItem.SetSupplementaryAnnotations(sa);
            sa.FinalizePositionalAnnotations();

            var dbSnp = sa.SaPosition.AlleleSpecificAnnotations["G"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as DbSnpAnnotation;

            Assert.NotNull(dbSnp);

            Assert.Equal(new List <long> {
                140739101
            }, dbSnp.DbSnp);
            Assert.Equal(true, sa.SaPosition.IsRefMinorAllele);
        }
Пример #3
0
        public void MergeSnvAndDeletion()
        {
            // NIR-906
            const string vcfLine1 =
                "1	862389	rs6693546	A	G	.	.	RS=6693546;RSPOS=862389;dbSNPBuildID=116;SSR=0;SAO=0;VP=0x05010008000515013e000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;GNO;KGPhase1;KGPhase3;CAF=0.3171,0.6829;COMMON=1";
            const string vcfLine2 = "1	862388	rs534606253	GA	G	.	.	RS=534606253;RSPOS=862389;dbSNPBuildID=142;SSR=0;SAO=0;VP=0x050000080005040024000200;WGT=1;VC=DIV;INT;ASP;VLD;KGPhase3;CAF=0.996,0.003994;COMMON=1";

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpEntry1 = dbsnpReader.ExtractItem(vcfLine1)[0];
            var dbSnpEntry2 = dbsnpReader.ExtractItem(vcfLine2)[0];

            var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(862389));

            dbSnpEntry1.SetSupplementaryAnnotations(sa);

            var sa2             = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(862389));
            var additionalEntry = dbSnpEntry2.SetSupplementaryAnnotations(sa2);

            additionalEntry.SetSupplementaryAnnotations(sa2);

            sa.MergeSaCreator(sa2);
            sa.FinalizePositionalAnnotations();

            Assert.Equal("G", sa.SaPosition.GlobalMajorAllele);
            Assert.Equal("0.6829", sa.SaPosition.GlobalMajorAlleleFrequency);
            Assert.Equal("A", sa.SaPosition.GlobalMinorAllele);
            Assert.Equal("0.3171", sa.SaPosition.GlobalMinorAlleleFrequency);
        }
Пример #4
0
        public void DeletionAndSnvMerge()
        {
            // NIR-906
            const string vcfLine1 =
                "2	193187631	rs774176075	TGTTG	T	.	.	RS=774176075;RSPOS=193187632;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000000005000002000200;WGT=1;VC=DIV;ASP";
            const string vcfLine2 = "2	193187632	rs2592266	G	T	.	.	RS=2592266;RSPOS=193187632;dbSNPBuildID=100;SSR=0;SAO=0;VP=0x050000000005150026000100;WGT=1;VC=SNV;ASP;VLD;G5;KGPhase3;CAF=0.01937,0.9806;COMMON=1";

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpEntry1 = dbsnpReader.ExtractItem(vcfLine1)[0];
            var dbSnpEntry2 = dbsnpReader.ExtractItem(vcfLine2)[0];

            var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(193187632));
            var additionalEntry = dbSnpEntry1.SetSupplementaryAnnotations(sa);

            additionalEntry.SetSupplementaryAnnotations(sa);


            var sa2 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(193187632));

            dbSnpEntry2.SetSupplementaryAnnotations(sa2);

            sa.MergeSaCreator(sa2);
            sa.FinalizePositionalAnnotations();

            Assert.Equal("T", sa.SaPosition.GlobalMajorAllele);
            Assert.Equal("0.9806", sa.SaPosition.GlobalMajorAlleleFrequency);
            Assert.Equal("G", sa.SaPosition.GlobalMinorAllele);
            Assert.Equal("0.01937", sa.SaPosition.GlobalMinorAlleleFrequency);
        }
Пример #5
0
        public void RwDbsnpGlobalAlleles()
        {
            //NIR-1262
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // create our expected data source versions
            var dbSnpVersion = new DataSourceVersion("dbSNP", "147", DateTime.Parse("2016-07-26").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion
            };

            const string vcfLine1 =
                "2	141724543	rs112783784	A	C,T	.	.	RS=112783784;RSPOS=141724543;dbSNPBuildID=132;SSR=0;SAO=0;VP=0x050100080015140136000100;WGT=1;VC=SNV;SLO;INT;OTH;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.04113,0.9105,0.04832;COMMON=1";

            const string vcfLine2 =
                "2	141724543	rs4300776	A	C	.	.	RS=4300776;RSPOS=141724543;dbSNPBuildID=111;SSR=0;SAO=0;VP=0x050100080015000102000100;WGT=1;VC=SNV;SLO;INT;OTH;ASP;GNO;CAF=0.04113,0.9105;COMMON=1";

            var sa        = new SupplementaryAnnotationPosition(141724543);
            var saCreator = new SupplementaryPositionCreator(sa);

            var dbsnpReader = new DbSnpReader(_renamer);

            foreach (var dbSnpItem in dbsnpReader.ExtractItem(vcfLine1))
            {
                dbSnpItem.SetSupplementaryAnnotations(saCreator);
            }

            foreach (var dbSnpItem in dbsnpReader.ExtractItem(vcfLine2))
            {
                dbSnpItem.SetSupplementaryAnnotations(saCreator);
            }

            // write the supplementary annotation file
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                saCreator.FinalizePositionalAnnotations();
                writer.Write(saCreator, sa.ReferencePosition);
            }

            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation = reader.GetAnnotation(141724543) as SupplementaryAnnotationPosition;
                Assert.NotNull(observedAnnotation);

                Assert.Equal("C", observedAnnotation.GlobalMajorAllele);
                Assert.Equal("0.9105", observedAnnotation.GlobalMajorAlleleFrequency);

                Assert.Equal("T", observedAnnotation.GlobalMinorAllele);
                Assert.Equal("0.04832", observedAnnotation.GlobalMinorAlleleFrequency);
            }

            File.Delete(randomPath);
            File.Delete(randomPath + ".idx");
        }
Пример #6
0
        public void RefAlleleMinor()
        {
            var sa = new SupplementaryPositionCreator();

            foreach (var oneKitem in _oneKGenReader.ExtractItems(VcfLine2))
            {
                oneKitem.SetSupplementaryAnnotations(sa);
            }
            sa.FinalizePositionalAnnotations();
            Assert.Equal(sa.IsRefMinor(), true);
        }
Пример #7
0
        public void MissingRefMinor()
        {
            const string vcfLine =
                "1	15274	rs62636497	A	G,T	100	PASS	AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC";

            var sa = new SupplementaryPositionCreator();

            foreach (var oneKitems in _oneKGenReader.ExtractItems(vcfLine))
            {
                oneKitems.SetSupplementaryAnnotations(sa);
            }
            sa.FinalizePositionalAnnotations();
            Assert.True(sa.IsRefMinor());
        }
Пример #8
0
        public void RefGlobalMajor()
        {
            // NIR-942
            const string vcfLine =
                "1	1242707	rs2274262	A	G,T	.	.	RS=2274262;RSPOS=1242707;RV;dbSNPBuildID=100;SSR=0;SAO=0;VP=0x0501004a000507013e000100;WGT=1;VC=SNV;SLO;U5;INT;R5;ASP;VLD;G5A;G5;GNO;KGPhase1;KGPhase3;CAF=0.4,0.4,0.2;COMMON=1";

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpEntry  = dbsnpReader.ExtractItem(vcfLine)[0];

            var sa        = new SupplementaryAnnotationPosition(1242707);
            var saCreator = new SupplementaryPositionCreator(sa);

            dbSnpEntry.SetSupplementaryAnnotations(saCreator);

            saCreator.FinalizePositionalAnnotations();

            Assert.Equal("A", sa.GlobalMajorAllele);
            Assert.Equal("G", sa.GlobalMinorAllele);
        }
Пример #9
0
        public void DisregardZeroFreq()
        {
            const string vcfLine =
                "1	241369	rs11490246	C	T	.	.	RS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050100000005000126000100;WGT=1;VC=SNV;SLO;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0";
            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpEntry  = dbsnpReader.ExtractItem(vcfLine)[0];

            var sa        = new SupplementaryAnnotationPosition(828);
            var saCreator = new SupplementaryPositionCreator(sa);

            dbSnpEntry.SetSupplementaryAnnotations(saCreator);

            saCreator.FinalizePositionalAnnotations();

            Assert.Equal("T", sa.GlobalMajorAllele);
            Assert.Equal("1", sa.GlobalMajorAlleleFrequency);
            Assert.Null(sa.GlobalMinorAllele);
            Assert.Null(sa.GlobalMinorAlleleFrequency);
        }
Пример #10
0
        public void MultiEntryMixedVariant()
        {
            var sa1 = new SupplementaryPositionCreator();

            var oneKItem1 = _oneKGenReader.ExtractItems(VcfLine5)[0];
            var oneKItem2 = _oneKGenReader.ExtractItems(VcfLine6)[0];

            oneKItem1.SetSupplementaryAnnotations(sa1);

            // additional items are ignored since they cannot be SNVs
            var sa2 = new SupplementaryPositionCreator();

            oneKItem2.SetSupplementaryAnnotations(sa2);

            sa1.MergeSaCreator(sa2);
            sa1.FinalizePositionalAnnotations();

            Assert.True(sa1.IsRefMinor());
        }
Пример #11
0
        public void MissingDbsnpId()
        {
            // refactorSA. Annotation for C is missing in the database. have to debug that.

            const string vcfLine =
                "X	21505833	rs12395602	G	A,C,T	.	.	RS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1";

            var dbsnpReader  = new DbSnpReader(_renamer);
            var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine);

            var sa        = new SupplementaryAnnotationPosition(21505833);
            var saCreator = new SupplementaryPositionCreator(sa);

            foreach (var dbSnpEntry in dbSnpEntries)
            {
                dbSnpEntry.SetSupplementaryAnnotations(saCreator);
            }

            saCreator.FinalizePositionalAnnotations();

            var dbSnpA =
                sa.AlleleSpecificAnnotations["A"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                DbSnpAnnotation;

            Assert.NotNull(dbSnpA);

            var dbSnpC =
                sa.AlleleSpecificAnnotations["C"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                DbSnpAnnotation;

            Assert.NotNull(dbSnpC);

            var dbSnpT =
                sa.AlleleSpecificAnnotations["T"].Annotations[DataSourceCommon.GetIndex(DataSourceCommon.DataSource.DbSnp)] as
                DbSnpAnnotation;

            Assert.NotNull(dbSnpT);

            Assert.Equal(12395602, dbSnpA.DbSnp[0]);
            Assert.Equal(12395602, dbSnpC.DbSnp[0]);
            Assert.Equal(12395602, dbSnpT.DbSnp[0]);
        }
Пример #12
0
        public void NoMinorAllele1()
        {
            const string vcfLine =
                "17	828	rs62053745	T	C	.	.	RS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.7424,.;COMMON=1";

            var dbsnpReader = new DbSnpReader(_renamer);
            var dbSnpEntry  = dbsnpReader.ExtractItem(vcfLine)[0];

            var sa        = new SupplementaryAnnotationPosition(828);
            var saCreator = new SupplementaryPositionCreator(sa);

            dbSnpEntry.SetSupplementaryAnnotations(saCreator);

            saCreator.FinalizePositionalAnnotations();

            Assert.Equal("T", sa.GlobalMajorAllele);
            Assert.Equal("0.7424", sa.GlobalMajorAlleleFrequency);
            Assert.Null(sa.GlobalMinorAllele);
            Assert.Null(sa.GlobalMinorAlleleFrequency);
        }
Пример #13
0
        public void SpuriousRefMinor()
        {
            // NIR-903
            const string vcfLine =
                "2	190634102	rs531674661;rs1225108	A	AC,C	100	PASS	AC=18,4905;AF=0.00359425,0.979433;AN=5008;NS=2504;DP=14024;EAS_AF=0.001,0.997;AMR_AF=0.0043,0.9899;AFR_AF=0,0.9402;EUR_AF=0.004,0.996;SAS_AF=0.0102,0.9898;VT=SNP,INDEL;MULTI_ALLELIC";

            var sa  = new SupplementaryPositionCreator();
            var sa1 = new SupplementaryPositionCreator();

            var oneKitems = _oneKGenReader.ExtractItems(vcfLine);

            oneKitems[0].SetSupplementaryAnnotations(sa);
            oneKitems[1].SetSupplementaryAnnotations(sa1);

            sa.FinalizePositionalAnnotations();
            sa1.FinalizePositionalAnnotations();

            Assert.False(sa.IsRefMinor());
            Assert.True(sa1.IsRefMinor());
        }
Пример #14
0
        /// <summary>
        /// writes the annotations to the current database file
        /// </summary>
        public void Write(SupplementaryPositionCreator spCreator, int referencePos, bool finalizePositinalAnnotation = true)
        {
            if (finalizePositinalAnnotation)
            {
                spCreator.FinalizePositionalAnnotations();
            }

            if (spCreator.IsEmpty())
            {
                return;
            }

            // add this entry to the index
            var currentOffset = _stream.Position;

            _index.Add((uint)referencePos, (uint)currentOffset, spCreator.IsRefMinor());
            if (spCreator.IsRefMinor())
            {
                RefMinorCount++;
            }

            spCreator.WriteAnnotation(_writer);
        }