Beispiel #1
0
        public void CloseColocatedGroupVariantTests()
        {
            List <string> resultStrings = new List <string>();
            string        incomingHangingVariantLine = null;
            string        outgoingHangingVariantLine = null;
            Dictionary <string, List <CalledAllele> > ColocatedAlleles = new Dictionary <string, List <CalledAllele> >();

            var vr = new AlleleReader(ColocatedVcfTestFile);
            var nextClosedLines         = vr.CloseColocatedLines(incomingHangingVariantLine, out outgoingHangingVariantLine);
            var nextClosedGroup         = AlleleReader.VcfLinesToAlleles(nextClosedLines);
            var outgoingHangingVariants = AlleleReader.VcfLineToAlleles(outgoingHangingVariantLine);

            //the algorithm should have grouped the first two, and left the last one hanging.
            //chr1    223906728.G.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    223906728.G   A   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:276,256:532:0.48:20:-100.0000
            //chr1    223906729.G.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000

            Assert.Equal(2, nextClosedGroup.Count);
            Assert.Equal(1, outgoingHangingVariants.Count);

            Assert.Equal(nextClosedGroup[0].Chromosome, "chr1");
            Assert.Equal(nextClosedGroup[0].ReferenceAllele, "G");
            Assert.Equal(nextClosedGroup[0].AlternateAllele, ".");
            Assert.Equal(nextClosedGroup[0].ReferencePosition, 223906728);

            Assert.Equal(nextClosedGroup[1].Chromosome, "chr1");
            Assert.Equal(nextClosedGroup[1].ReferenceAllele, "G");
            Assert.Equal(nextClosedGroup[1].AlternateAllele, "A");
            Assert.Equal(nextClosedGroup[1].ReferencePosition, 223906728);

            Assert.Equal(outgoingHangingVariants[0].Chromosome, "chr1");
            Assert.Equal(outgoingHangingVariants[0].ReferenceAllele, "G");
            Assert.Equal(outgoingHangingVariants[0].AlternateAllele, ".");
            Assert.Equal(outgoingHangingVariants[0].ReferencePosition, 223906729);

            ColocatedAlleles.Add(nextClosedGroup[0].Chromosome + "_" + nextClosedGroup[0].ReferencePosition, nextClosedGroup);


            //now read the rest of the file

            while (true)
            {
                incomingHangingVariantLine = outgoingHangingVariantLine;

                if (incomingHangingVariantLine == null)
                {
                    break;
                }

                var nextGroupLines = vr.CloseColocatedLines(incomingHangingVariantLine, out outgoingHangingVariantLine);
                var nextGroup      = AlleleReader.VcfLinesToAlleles(nextGroupLines);
                ColocatedAlleles.Add(nextGroup[0].Chromosome + "_" + nextGroup[0].ReferencePosition, nextGroup);
            }

            //check that everything loaded correctly
            Assert.Equal(28, ColocatedAlleles.Keys.Count);

            //example ref site with one allele
            //chr1	223906730	.	G	.	100	PASS	DP=532	GT:GQ:AD:DP:VF:NL:SB	0/0:100:532:532:0.00:20:-100.0000
            var ex1 = ColocatedAlleles["chr1_223906730"];

            Assert.Equal(ex1.Count, 1);
            Assert.Equal(ex1[0].Chromosome, "chr1");
            Assert.Equal(ex1[0].ReferenceAllele, "G");
            Assert.Equal(ex1[0].AlternateAllele, ".");
            Assert.Equal(ex1[0].ReferencePosition, 223906730);

            //example mulit allelic site as one vcf line
            //chr1	223906731	.	C	A,T	100	PASS	DP=532	GT:GQ:AD:DP:VF:NL:SB	1/2:100:254,254:532:0.95:20:-100.0000
            var ex2 = ColocatedAlleles["chr1_223906731"];

            Assert.Equal(ex2.Count, 2);
            Assert.Equal(ex2[0].Chromosome, "chr1");
            Assert.Equal(ex2[0].ReferenceAllele, "C");
            Assert.Equal(ex2[0].AlternateAllele, "A");
            Assert.Equal(ex2[0].ReferencePosition, 223906731);
            Assert.Equal(ex2[1].Chromosome, "chr1");
            Assert.Equal(ex2[1].ReferenceAllele, "C");
            Assert.Equal(ex2[1].AlternateAllele, "T");
            Assert.Equal(ex2[1].ReferencePosition, 223906731);


            //example multi allelic site as multiple vcf lines
            //chr1    223906746.G.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    223906746.G   A   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:276,256:532:0.48:20:-100.0000
            //chr1    223906746.G   AC  100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:276,256:532:0.48:20:-100.0000
            //chr1    223906746.GG  AT  100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:276,256:532:0.48:20:-100.0000

            var ex3 = ColocatedAlleles["chr1_223906746"];

            Assert.Equal(ex3.Count, 4);
            Assert.Equal(ex3[0].Chromosome, "chr1");
            Assert.Equal(ex3[0].ReferenceAllele, "G");
            Assert.Equal(ex3[0].AlternateAllele, ".");
            Assert.Equal(ex3[0].ReferencePosition, 223906746);
            Assert.Equal(ex3[1].Chromosome, "chr1");
            Assert.Equal(ex3[1].ReferenceAllele, "G");
            Assert.Equal(ex3[1].AlternateAllele, "A");
            Assert.Equal(ex3[1].ReferencePosition, 223906746);
            Assert.Equal(ex3[2].Chromosome, "chr1");
            Assert.Equal(ex3[2].ReferenceAllele, "G");
            Assert.Equal(ex3[2].AlternateAllele, "AC");
            Assert.Equal(ex3[2].ReferencePosition, 223906746);
            Assert.Equal(ex3[3].Chromosome, "chr1");
            Assert.Equal(ex3[3].ReferenceAllele, "GG");
            Assert.Equal(ex3[3].AlternateAllele, "AT");
            Assert.Equal(ex3[3].ReferencePosition, 223906746);

            //check the last vcf lines
            //chrY	87003973	.	T	.	100	PASS	DP=532	GT:GQ:AD:DP:VF:NL:SB	0/0:100:276:532:0.48:20:-100.0000
            //chrY    87003973.ATCTC   A   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:276,256:532:0.48:20:-100.0000
            var ex4 = ColocatedAlleles["chrY_87003973"];

            Assert.Equal(ex4.Count, 2);
            Assert.Equal(ex4[0].Chromosome, "chrY");
            Assert.Equal(ex4[0].ReferenceAllele, "T");
            Assert.Equal(ex4[0].AlternateAllele, ".");
            Assert.Equal(ex4[0].ReferencePosition, 87003973);
            Assert.Equal(ex4[1].Chromosome, "chrY");
            Assert.Equal(ex4[1].ReferenceAllele, "ATCTC");
            Assert.Equal(ex4[1].AlternateAllele, "A");
            Assert.Equal(ex4[1].ReferencePosition, 87003973);
        }