コード例 #1
0
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);

            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.HomozygousRef, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.HomozygousRef, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].Coordinate);
            Assert.Equal(124, acceptedRefs[124].Coordinate);
        }
コード例 #2
0
        public static List <VeadGroup> GetSampleVeadGroups(int numVeads = 4, int numVeadGroups = 1, bool useAlternateVariantSites = false, string prefix = "")
        {
            var veadgroups = new List <VeadGroup>();

            for (int i = 0; i < numVeadGroups; i++)
            {
                var veads = new List <Vead>();

                for (int j = 0; j < numVeads; j++)
                {
                    Vead vead;
                    if (useAlternateVariantSites)
                    {
                        vead = PhasedVariantTestUtilities.CreateVeadFromStringArray(prefix + "r" + i * j, new[, ] {
                            { "C", "C" }, { "G", "A" }
                        });
                    }
                    else
                    {
                        vead = PhasedVariantTestUtilities.CreateVeadFromStringArray(prefix + "r" + i * j, new[, ] {
                            { "A", "T" }, { "G", "C" }
                        });
                    }
                    veads.Add(vead);
                }

                veadgroups.Add(PhasedVariantTestUtilities.CreateVeadGroup(veads));
            }

            return(veadgroups);
        }
コード例 #3
0
        public void AddMnvsFromClusters()
        {
            //TODO even with mock cluster this takes too much setting up.
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T");

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };

            nbhd.ReferenceSequence = "CGT";

            var mockCluster = new Mock <ICluster>();

            mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 });
            var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads);

            mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults);
            mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>()
            {
                consensus
            });
            nbhd.AddMnvsFromClusters(new List <ICluster>()
            {
                mockCluster.Object
            }, 20, 100);

            var allele = nbhd.CandidateVariants.First();

            Assert.Equal(6, allele.TotalCoverage);
            Assert.Equal(6, allele.AlleleSupport);
            Assert.Equal("CGT", allele.Reference);
            Assert.Equal("AAA", allele.Alternate);

            var depths = nbhd.DepthAtSites(new List <ICluster>()
            {
                mockCluster.Object
            });

            Assert.Equal(3, depths.Length);
            Assert.Equal(3, depths[0]);
            Assert.Equal(3, depths[1]);
            Assert.Equal(3, depths[2]);
        }
コード例 #4
0
        public void SetDepthAtSites()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T");

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };
        }
コード例 #5
0
        private Mock <IVeadGroupSource> MockVeadSource()
        {
            var returnVeads = new List <VeadGroup>
            {
                new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new [, ] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                })),
                new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new [, ] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                })),
                new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new [, ] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                })),
            };

            var veadSource = new Mock <IVeadGroupSource>();

            veadSource.Setup(s => s.GetVeadGroups(It.IsAny <VcfNeighborhood>())).Returns(returnVeads);
            return(veadSource);
        }
コード例 #6
0
        private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads)
        {
            var neighborhoods = new List <VcfNeighborhood>();

            for (var i = 0; i < expectedNumberOfThreads; i++)
            {
                var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T")
                {
                    VcfVariantSites = new List <VariantSite>
                    {
                        new VariantSite(123)
                        {
                            ReferenceName         = "chr1",
                            OriginalAlleleFromVcf = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156)
                                                    //orignally at index 0
                        },
                    }
                };

                neighborhoods.Add(neighborhood);
            }
            return(neighborhoods);
        }
コード例 #7
0
        public void WriteHeader()
        {
            //WriteHeader should write the original header and add a line about phaser used right before the column headers

            var writer = InitializeWriter(false);

            writer.WriteHeader();
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> {
                PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156)
            }));
            writer.Dispose();

            Assert.True(File.Exists(_outputFile));
            var fileLines = File.ReadAllLines(_outputFile);

            Assert.Equal(_origHeader[0], fileLines[0]);
            Assert.Equal(_origHeader[1], fileLines[1]);
            Assert.NotEqual(_origHeader[2], fileLines[2]);
            Assert.True(fileLines[2].StartsWith("##VariantPhaser=Scylla"));
            Assert.Equal(_origHeader[2], fileLines[4]);
        }
コード例 #8
0
        public void VarCallsBecomeRefsAndNulls()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T", VariantQscore = 35, AlleleSupport = 10, TotalCoverage = 50
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);


            var vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "844" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);

            // If one has been sucked up and there are refs remaining, we should output it as a ref.
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 100 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);


            // If one has been sucked up all the way
            // we should output it as a null.
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 1000 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }, { "DP", "1000" }, { "AD", "0" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
        }
コード例 #9
0
        public void CallAVariantInANewLocation()
        {
            //set up the original variants
            var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);
            var vs3 = new VariantSite(originalVcfVariant3);
            var vs4 = new VariantSite(originalVcfVariant4);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not goig to get used for phasing. Sps it is a variant that failed filters.
            nbhd.SetRangeOfInterest();

            //now stage one candidate MNV:
            var newMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 129, Reference = "A", Alternate = "TT"
            };


            nbhd.AddAcceptedPhasedVariant(newMNV);
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;


            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[129].Count);

            Assert.Equal(3, acceptedRefs.Count);

            VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
            VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]);
        }
コード例 #10
0
        public void AddMnvsFromClusters()
        {
            //TODO even with mock cluster this takes too much setting up.
            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121));

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };

            vead.SiteResults[0].VcfReferencePosition = 1;
            vead.SiteResults[1].VcfReferencePosition = 2;
            vead.SiteResults[2].VcfReferencePosition = 3;

            vead2.SiteResults[0].VcfReferencePosition = 1;
            vead2.SiteResults[1].VcfReferencePosition = 2;
            vead2.SiteResults[2].VcfReferencePosition = 3;

            vead3.SiteResults[0].VcfReferencePosition = 1;
            vead3.SiteResults[1].VcfReferencePosition = 2;
            vead3.SiteResults[2].VcfReferencePosition = 3;

            var mockCluster = new Mock <ICluster>();

            mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 });
            var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads);

            mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults);
            mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>()
            {
                consensus
            });

            var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            callableNeighborhood.NbhdReferenceSequenceSubstring = "CGT";
            callableNeighborhood.CreateMnvsFromClusters(new List <ICluster>()
            {
                mockCluster.Object
            }, 20);

            var allele = callableNeighborhood.CandidateVariants.First();

            Assert.Equal(6, allele.TotalCoverage);
            Assert.Equal(6, allele.AlleleSupport);
            Assert.Equal("CGT", allele.ReferenceAllele);
            Assert.Equal("AAA", allele.AlternateAllele);

            int[] depths  = new int[0];
            int[] nocalls = new int[0];
            callableNeighborhood.DepthAtSites(new List <ICluster>()
            {
                mockCluster.Object
            }, out depths, out nocalls);
            Assert.Equal(3, depths.Length);
            Assert.Equal(3, depths[0]);
            Assert.Equal(3, depths[1]);
            Assert.Equal(3, depths[2]);
        }
コード例 #11
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new[] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.Coordinate, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.Coordinate, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
コード例 #12
0
        public void Write()
        {
            //write a normal vcf
            var writer = InitializeWriter(false);

            //Writer should order the variants by chrom, coord, ref, then alt.
            var variants = new List <CalledAllele>
            {
                PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "A", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr8", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156),
            };

            // Order should be:
            var expected = new List <string> {
                "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tT\tA\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tT\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000"
            };

            writer.Write(variants);
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> {
                PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156)
            }));
            writer.Dispose();

            var fileLines = File.ReadAllLines(_outputFile);

            Assert.Equal(variants.Count, fileLines.Length);

            for (int i = 0; i < expected.Count; i++)
            {
                Assert.Equal(expected[i], fileLines[i]);
            }

            //write a crushed vcf
            writer = InitializeWriter(true);
            writer.Write(variants);
            writer.Dispose();
            fileLines = File.ReadAllLines(_outputFile);

            expected = new List <string> {
                "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tA\tC,A,C\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000"
            };

            Assert.Equal(6, fileLines.Length); //only variants at diff positions
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.Equal(expected[i], fileLines[i]);
            }
        }
コード例 #13
0
        public void OrderVariants()
        {
            var chr10           = PhasedVariantTestUtilities.CreateDummyVariant("chr10", 123, "A", "C", 1000, 156);
            var chrX            = PhasedVariantTestUtilities.CreateDummyVariant("chrX", 123, "A", "C", 1000, 156);
            var chrXSecond      = PhasedVariantTestUtilities.CreateDummyVariant("chrX", 124, "A", "C", 1000, 156);
            var chrM            = PhasedVariantTestUtilities.CreateDummyVariant("chrM", 123, "A", "C", 1000, 156);
            var chrMSecond      = PhasedVariantTestUtilities.CreateDummyVariant("chrM", 124, "A", "C", 1000, 156);
            var chr9            = PhasedVariantTestUtilities.CreateDummyVariant("chr9", 123, "A", "C", 1000, 156);
            var chr9Second      = PhasedVariantTestUtilities.CreateDummyVariant("chr9", 124, "A", "C", 1000, 156);
            var nonstandardChrZ = PhasedVariantTestUtilities.CreateDummyVariant("chrZ", 123, "A", "C", 1000, 156);
            var nonstandardChrA = PhasedVariantTestUtilities.CreateDummyVariant("chrA", 123, "A", "C", 1000, 156);

            // ---------------------------------------------------------------------------
            // When neither or both is on chrM, shouldn't matter if we set option to prioritize chrM
            // ---------------------------------------------------------------------------

            // Same chrom, different positions - numeric chrom
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chr9Second, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chr9Second, false));
            Assert.Equal(1, Extensions.OrderVariants(chr9Second, chr9, true));
            Assert.Equal(1, Extensions.OrderVariants(chr9Second, chr9, false));

            // Same chrom, different positions - chrX
            Assert.Equal(-1, Extensions.OrderVariants(chrX, chrXSecond, true));
            Assert.Equal(-1, Extensions.OrderVariants(chrX, chrXSecond, false));
            Assert.Equal(1, Extensions.OrderVariants(chrXSecond, chrX, true));
            Assert.Equal(1, Extensions.OrderVariants(chrXSecond, chrX, false));

            // Same chrom, different positions - chrM
            Assert.Equal(-1, Extensions.OrderVariants(chrM, chrMSecond, true));
            Assert.Equal(-1, Extensions.OrderVariants(chrM, chrMSecond, false));
            Assert.Equal(1, Extensions.OrderVariants(chrMSecond, chrM, true));
            Assert.Equal(1, Extensions.OrderVariants(chrMSecond, chrM, false));

            // Different chroms, one is >=10 (direct string compare would not sort these chroms correctly)
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chr10, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chr10, false));

            // One numeric, one chrX
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chrX, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chrX, false));

            // Same chrom, same position
            Assert.Equal(0, Extensions.OrderVariants(chr9, chr9, true));
            Assert.Equal(0, Extensions.OrderVariants(chrX, chrX, true));
            Assert.Equal(0, Extensions.OrderVariants(chrM, chrM, true));


            // ---------------------------------------------------------------------------
            // If one is on chrM, option to prioritize chrM matters
            // ---------------------------------------------------------------------------

            // One numeric, one chrM
            Assert.Equal(1, Extensions.OrderVariants(chr9, chrM, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, chrM, false));

            // One chrX, one chrM
            Assert.Equal(1, Extensions.OrderVariants(chrX, chrM, true));
            Assert.Equal(-1, Extensions.OrderVariants(chrX, chrM, false));

            // ---------------------------------------------------------------------------
            // Nonstandard chroms should be below numerics and then ordered alphabetically
            // ---------------------------------------------------------------------------

            // One numeric, one weird
            Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrZ, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrZ, false));

            // One chrX, one weird
            Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, true));
            Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, false));

            // One chrM, one weird
            Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, true));
            Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, false));

            // One numeric, one funny
            Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrA, true));
            Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrA, false));

            // One chrX, one funny
            Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, true));
            Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, false));

            // One chrM, one funny
            Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, true));
            Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, false));
        }
コード例 #14
0
        public void Convert()
        {
            var vcfVar = PhasedVariantTestUtilities.CreateDummyVariant("chr10", 123, "A", "C", 1000, 156);

            vcfVar.Genotypes[0]["GT"] = "0/1";
            var allele = Extensions.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate);
            Assert.Equal(vcfVar.ReferenceAllele, allele.Reference);
            Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate);
            Assert.Equal(new List <FilterType>()
            {
            }, allele.Filters);
            Assert.Equal(Genotype.HeterozygousAltRef, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "./.";
            vcfVar.Filters            = "R5x9";
            allele = Extensions.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate);
            Assert.Equal(vcfVar.ReferenceAllele, allele.Reference);
            Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.RMxN
            }, allele.Filters);
            Assert.Equal(Genotype.AltLikeNoCall, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/2";
            vcfVar.Filters            = "R5x9;SB";
            allele = Extensions.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate);
            Assert.Equal(vcfVar.ReferenceAllele, allele.Reference);
            Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.RMxN, FilterType.StrandBias
            }, allele.Filters);
            Assert.Equal(Genotype.HeterozygousAlt1Alt2, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/1";
            vcfVar.Filters            = "R8;q30";
            allele = Extensions.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.IndelRepeatLength, FilterType.LowVariantQscore
            }, allele.Filters);
            Assert.Equal(Genotype.HomozygousAlt, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/1";
            vcfVar.Filters            = "lowvariantfreq;multiallelicsite";
            allele = Extensions.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.LowVariantFrequency, FilterType.MultiAllelicSite
            }, allele.Filters);
            Assert.Equal(Genotype.HomozygousAlt, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);
        }
コード例 #15
0
        public void ClusterVeadGroups()
        {
            // ----------------------------------------------------
            // Four Ns
            //  - This is from original "FourNs Test"
            // ----------------------------------------------------

            var veads = new List <Vead>()
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] {
                    { "C", "C" }, { "G", "N" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] {
                    { "C", "C" }, { "G", "N" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] {
                    { "C", "C" }, { "G", "N" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] {
                    { "C", "C" }, { "G", "N" }
                }),
            };

            var veadgroup = PhasedVariantTestUtilities.CreateVeadGroup(veads);

            ExecuteClusteringTest(new List <VeadGroup>()
            {
                veadgroup
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    veadgroup
                }
            }, new List <string>()
            {
                "C>C,G>N"
            }
                                  , 1);


            // ----------------------------------------------------
            // Real Data
            //  - This data is from Sample 129 (original "Sample129Test")
            // ----------------------------------------------------

            veads = new List <Vead>()
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] {
                    { "A", "G" }, { "N", "N" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] {
                    { "A", "G" }, { "C", "C" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] {
                    { "A", "A" }, { "C", "C" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] {
                    { "A", "G" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[2, 2] {
                    { "N", "N" }, { "C", "C" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[2, 2] {
                    { "N", "N" }, { "C", "A" }
                }),
            };

            var group1 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] {
                { "A", "G" }, { "N", "N" }
            }));
            var group4 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] {
                { "A", "G" }, { "C", "A" }
            }));
            var group6 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[2, 2] {
                { "N", "N" }, { "C", "A" }
            }));

            var group2 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] {
                { "A", "G" }, { "C", "C" }
            }));

            var group3 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] {
                { "A", "A" }, { "C", "C" }
            }));
            var group5 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[2, 2] {
                { "N", "N" }, { "C", "C" }
            }));

            ExecuteClusteringTest(new List <VeadGroup>()
            {
                group1, group2, group3, group4, group5, group6
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    group4, group6, group1
                },
                new List <VeadGroup> {
                    group3, group5
                },
                new List <VeadGroup> {
                    group2
                },
            },
                                  new List <string>()
            {
                "A>G,C>A", "A>G,C>C", "A>A,C>C"
            }
                                  , 1, 0);

            // ----------------------------------------------------
            // Ten grouped reads
            //  - This is from original "10 ReadsTest"
            // ----------------------------------------------------

            group1 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead>
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[6, 2] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[6, 2] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[6, 2] {
                    { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }
                }),
            });
            group2 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead>
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[6, 2] {
                    { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[6, 2] {
                    { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r7", new string[6, 2] {
                    { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r8", new string[6, 2] {
                    { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r9", new string[6, 2] {
                    { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
            });
            group3 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead>
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r10", new string[6, 2] {
                    { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" }
                }),
            });
            group4 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead>
            {
                PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[6, 2] {
                    { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" }
                }),
            });

            ExecuteClusteringTest(new List <VeadGroup>()
            {
                group1, group2, group3, group4
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    group1
                },
                new List <VeadGroup> {
                    group2, group3
                },
                new List <VeadGroup> {
                    group4
                },
            }
                                  , new List <string>()
            {
                "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C"
            }, 4, 0);


            ExecuteClusteringTest(new List <VeadGroup>()
            {
                group1, group2, group3, group4
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    group1
                },
                new List <VeadGroup> {
                    group2, group3
                },
                new List <VeadGroup> {
                    group4
                },
            }
                                  , new List <string>()
            {
                "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C"
            }, 4, 0,
                                  ploidyConstraint: 3);


            ExecuteClusteringTest(new List <VeadGroup>()
            {
                group1, group2, group3, group4
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    group1
                },                    // 6 reads
                new List <VeadGroup> {
                    group2, group3
                },                           //3 reads
                //new List<VeadGroup>{group4}, //1 reads -> the looser
            }
                                  , new List <string>()
            {
                "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C"
            }, 4, 0,
                                  ploidyConstraint: 2);

            ExecuteClusteringTest(new List <VeadGroup>()
            {
                group1, group2, group3, group4
            },
                                  new List <List <VeadGroup> >
            {
                new List <VeadGroup> {
                    group1
                },                    // 6 reads -> the winner
            }
                                  , new List <string>()
            {
                "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C"
            }, 4, 0,
                                  ploidyConstraint: 1);
        }
コード例 #16
0
        public void GetAcceptedVariants_MergeVariants()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "C", 1000, 156);

            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            var newMNV = new CalledAllele()
            {
                Chromosome = "chr1",
                Coordinate = 229,
                Reference  = "AA",
                Alternate  = "T",
                Genotype   = Genotype.HeterozygousAltRef
            };

            var stagedVcfVariants = new List <CalledAllele> {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3, originalVcfVariant4
            };

            var variantsUsedByCaller2 = new List <CalledAllele>()
            {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3
            };

            var nbhd = new Mock <IVcfNeighborhood>();

            nbhd.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller2.ToList());

            var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >()
            {
                { newMNV.Coordinate, new List <CalledAllele>()
                  {
                      newMNV
                  } }
            };

            nbhd.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2);

            // If one has been sucked up all the way, we should output it as a nocall
            // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion.
            var stagedCalledRefs2 = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall
                  } },
                { 234, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 234, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.HomozygousRef
                  } }
            };

            nbhd.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2);


            var accepted = VcfMerger.GetMergedListOfVariants(nbhd.Object, stagedVcfVariants.ToList());


            Assert.Equal(5, accepted.Count);

            CheckVariantsMatch(vcfVariant0asRef, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(newMNV, accepted[2]);
            CheckVariantsMatch(vcfVariant3asRef, accepted[3]);
            CheckVariantsMatch(originalVcfVariant4, accepted[4]);
        }
コード例 #17
0
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileWriterTests.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new [] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Diploid,
            };

            //note, scylla has no SB or RMxN or R8 filters.


            var variants = new List <CalledAllele>
            {
                PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
            };

            variants[0].Filters.AddRange(new List <FilterType> {
                FilterType.RMxN, FilterType.LowDepth, FilterType.LowVariantFrequency
            });
            variants[1].Filters.AddRange(new List <FilterType> {
                FilterType.IndelRepeatLength, FilterType.LowVariantQscore, FilterType.StrandBias
            });

            var originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            VcfReader     reader        = new VcfReader(outputFilePath);
            List <string> writtenHeader = reader.HeaderLines;

            reader.Dispose();

            var expectedHeader1 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">",
                "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            Assert.Equal(expectedHeader1.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader1.Count; i++)
            {
                if (expectedHeader1[i].StartsWith("##VariantPhaser="))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser="));
                    continue;
                }
                Assert.Equal(expectedHeader1[i], writtenHeader[i]);
            }

            config = new VcfWriterConfig
            {
                DepthFilterThreshold          = 500,
                VariantQualityFilterThreshold = 22,
                FrequencyFilterThreshold      = 0.007f,
                EstimatedBaseCallQuality      = 23,
                PloidyModel = PloidyModel.Somatic,
            };


            originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };
            writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);


            var expectedHeader2 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "##FILTER=<ID=q22,Description=\"Quality score less than 22\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam",
            };

            variants[0].Filters = new List <FilterType>();
            variants[1].Filters = new List <FilterType>();

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            reader        = new VcfReader(outputFilePath);
            writtenHeader = reader.HeaderLines;
            reader.Dispose();

            Assert.Equal(expectedHeader2.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader2.Count; i++)
            {
                if (expectedHeader2[i].StartsWith("##VariantPhaser="))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser="));
                    continue;
                }

                Assert.Equal(expectedHeader2[i], writtenHeader[i]);
            }
        }
コード例 #18
0
        public void GetAcceptedVariants_MergeNull()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var stagedVcfVariants   = new List <CalledAllele> {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3
            };

            var variantsUsedByCaller = new List <CalledAllele>()
            {
                originalVcfVariant, originalVcfVariant2
            };

            var stagedCalledMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T"
            };

            var stagedCalledMNVs = new Dictionary <int, List <CalledAllele> >()
            {
                { stagedCalledMNV.Coordinate, new List <CalledAllele>()
                  {
                      stagedCalledMNV
                  } }
            };

            var stagedCalledRefs = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } }
            };


            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var mockNeighborhood = new Mock <IVcfNeighborhood>();

            mockNeighborhood.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller.ToList());
            mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs);
            mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs);


            var accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants.ToList());

            Assert.Equal(3, accepted.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };



            CheckVariantsMatch(originalVcfVariant, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(originalVcfVariant3, accepted[2]);

            //re-stage the MNVs
            var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >()
            {
                { stagedCalledMNV.Coordinate, new List <CalledAllele>()
                  {
                      stagedCalledMNV
                  } }
            };

            mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2);

            // If one has been sucked up all the way, we should output it as a nocall
            // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion.
            var stagedCalledRefs2 = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall
                  } }
            };

            mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2);

            accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants);


            Assert.Equal(3, accepted.Count);

            vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            CheckVariantsMatch(originalVcfVariant, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(originalVcfVariant3, accepted[2]);
        }