public void CloseColocatedGroupVariantTests() { List <string> resultStrings = new List <string>(); string incomingHangingVariantLine = null; string outgoingHangingVariantLine = null; Dictionary <string, List <CalledAllele> > ColocatedAlleles = new Dictionary <string, List <CalledAllele> >(); var vr = new AlleleReader(ColocatedVcfTestFile); var nextClosedLines = vr.CloseColocatedLines(incomingHangingVariantLine, out outgoingHangingVariantLine); var nextClosedGroup = AlleleReader.VcfLinesToAlleles(nextClosedLines); var outgoingHangingVariants = AlleleReader.VcfLineToAlleles(outgoingHangingVariantLine); //the algorithm should have grouped the first two, and left the last one hanging. //chr1 223906728.G. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 223906728.G A 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:276,256:532:0.48:20:-100.0000 //chr1 223906729.G. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 Assert.Equal(2, nextClosedGroup.Count); Assert.Equal(1, outgoingHangingVariants.Count); Assert.Equal(nextClosedGroup[0].Chromosome, "chr1"); Assert.Equal(nextClosedGroup[0].ReferenceAllele, "G"); Assert.Equal(nextClosedGroup[0].AlternateAllele, "."); Assert.Equal(nextClosedGroup[0].ReferencePosition, 223906728); Assert.Equal(nextClosedGroup[1].Chromosome, "chr1"); Assert.Equal(nextClosedGroup[1].ReferenceAllele, "G"); Assert.Equal(nextClosedGroup[1].AlternateAllele, "A"); Assert.Equal(nextClosedGroup[1].ReferencePosition, 223906728); Assert.Equal(outgoingHangingVariants[0].Chromosome, "chr1"); Assert.Equal(outgoingHangingVariants[0].ReferenceAllele, "G"); Assert.Equal(outgoingHangingVariants[0].AlternateAllele, "."); Assert.Equal(outgoingHangingVariants[0].ReferencePosition, 223906729); ColocatedAlleles.Add(nextClosedGroup[0].Chromosome + "_" + nextClosedGroup[0].ReferencePosition, nextClosedGroup); //now read the rest of the file while (true) { incomingHangingVariantLine = outgoingHangingVariantLine; if (incomingHangingVariantLine == null) { break; } var nextGroupLines = vr.CloseColocatedLines(incomingHangingVariantLine, out outgoingHangingVariantLine); var nextGroup = AlleleReader.VcfLinesToAlleles(nextGroupLines); ColocatedAlleles.Add(nextGroup[0].Chromosome + "_" + nextGroup[0].ReferencePosition, nextGroup); } //check that everything loaded correctly Assert.Equal(28, ColocatedAlleles.Keys.Count); //example ref site with one allele //chr1 223906730 . G . 100 PASS DP=532 GT:GQ:AD:DP:VF:NL:SB 0/0:100:532:532:0.00:20:-100.0000 var ex1 = ColocatedAlleles["chr1_223906730"]; Assert.Equal(ex1.Count, 1); Assert.Equal(ex1[0].Chromosome, "chr1"); Assert.Equal(ex1[0].ReferenceAllele, "G"); Assert.Equal(ex1[0].AlternateAllele, "."); Assert.Equal(ex1[0].ReferencePosition, 223906730); //example mulit allelic site as one vcf line //chr1 223906731 . C A,T 100 PASS DP=532 GT:GQ:AD:DP:VF:NL:SB 1/2:100:254,254:532:0.95:20:-100.0000 var ex2 = ColocatedAlleles["chr1_223906731"]; Assert.Equal(ex2.Count, 2); Assert.Equal(ex2[0].Chromosome, "chr1"); Assert.Equal(ex2[0].ReferenceAllele, "C"); Assert.Equal(ex2[0].AlternateAllele, "A"); Assert.Equal(ex2[0].ReferencePosition, 223906731); Assert.Equal(ex2[1].Chromosome, "chr1"); Assert.Equal(ex2[1].ReferenceAllele, "C"); Assert.Equal(ex2[1].AlternateAllele, "T"); Assert.Equal(ex2[1].ReferencePosition, 223906731); //example multi allelic site as multiple vcf lines //chr1 223906746.G. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 223906746.G A 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:276,256:532:0.48:20:-100.0000 //chr1 223906746.G AC 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:276,256:532:0.48:20:-100.0000 //chr1 223906746.GG AT 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:276,256:532:0.48:20:-100.0000 var ex3 = ColocatedAlleles["chr1_223906746"]; Assert.Equal(ex3.Count, 4); Assert.Equal(ex3[0].Chromosome, "chr1"); Assert.Equal(ex3[0].ReferenceAllele, "G"); Assert.Equal(ex3[0].AlternateAllele, "."); Assert.Equal(ex3[0].ReferencePosition, 223906746); Assert.Equal(ex3[1].Chromosome, "chr1"); Assert.Equal(ex3[1].ReferenceAllele, "G"); Assert.Equal(ex3[1].AlternateAllele, "A"); Assert.Equal(ex3[1].ReferencePosition, 223906746); Assert.Equal(ex3[2].Chromosome, "chr1"); Assert.Equal(ex3[2].ReferenceAllele, "G"); Assert.Equal(ex3[2].AlternateAllele, "AC"); Assert.Equal(ex3[2].ReferencePosition, 223906746); Assert.Equal(ex3[3].Chromosome, "chr1"); Assert.Equal(ex3[3].ReferenceAllele, "GG"); Assert.Equal(ex3[3].AlternateAllele, "AT"); Assert.Equal(ex3[3].ReferencePosition, 223906746); //check the last vcf lines //chrY 87003973 . T . 100 PASS DP=532 GT:GQ:AD:DP:VF:NL:SB 0/0:100:276:532:0.48:20:-100.0000 //chrY 87003973.ATCTC A 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:276,256:532:0.48:20:-100.0000 var ex4 = ColocatedAlleles["chrY_87003973"]; Assert.Equal(ex4.Count, 2); Assert.Equal(ex4[0].Chromosome, "chrY"); Assert.Equal(ex4[0].ReferenceAllele, "T"); Assert.Equal(ex4[0].AlternateAllele, "."); Assert.Equal(ex4[0].ReferencePosition, 87003973); Assert.Equal(ex4[1].Chromosome, "chrY"); Assert.Equal(ex4[1].ReferenceAllele, "ATCTC"); Assert.Equal(ex4[1].AlternateAllele, "A"); Assert.Equal(ex4[1].ReferencePosition, 87003973); }