Exemple #1
0
        public void CheckPrecedingAndTrailingBasesGetRemoved()
        {
            //anchored

            int anchorPosition = -1;

            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "TCTCAAAAAACGT";
            clusterVariantSites[0].VcfAlternateAllele = "TCGTACGT";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, anchorPosition);

            Assert.Equal("TCAAAAA", allele.ReferenceAllele);
            Assert.Equal("GT", allele.AlternateAllele);
            Assert.Equal(28608285 + 2, allele.ReferencePosition);
        }
Exemple #2
0
        public void CheckOverlappingMNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AGG"; //5,6,7
            clusterVariantSites[0].VcfAlternateAllele = "CCT";


            clusterVariantSites[1].VcfReferenceAllele = "GGA"; //7,8,9
            clusterVariantSites[1].VcfAlternateAllele = "TTT";

            clusterVariantSites[2].VcfReferenceAllele = "A";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AGGGA", allele.ReferenceAllele);
            Assert.Equal("CCTTT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #3
0
        public void CheckDeletionsWithAnchoring()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AGAAGTACTCATTATCTGT";
            clusterVariantSites[0].VcfAlternateAllele = "A";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(1, refsToRemove.Count);

            Assert.Equal("AGAAGTACTCATTATCTGT", allele.ReferenceAllele);
            Assert.Equal("A", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);


            neighborhoodDepthAtSites   = new int[] { 100, 200, 100, 200 };
            neighborhoodNoCallsAtSites = new int[] { 0, 0, 0, 0 };
            clusterCountsAtSites       = new int[] { 90, 190, 10, 20 };
            clusterVariantSites        = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287),
                new VariantSite(28608288), new VariantSite(28608291)
            };


            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "A";

            clusterVariantSites[1].VcfReferenceAllele = "AAG";
            clusterVariantSites[1].VcfAlternateAllele = "A";

            clusterVariantSites[2].VcfReferenceAllele = "A";
            clusterVariantSites[2].VcfAlternateAllele = "A";

            clusterVariantSites[3].VcfReferenceAllele = "ACTCAT";
            clusterVariantSites[3].VcfAlternateAllele = "A";

            // referenceSequence = "AGA[AG]TA[CTCAT]TATCTGAGGAGCCGGTCACCTGTACCA";
            // altSequence = "AGA[XX]TA[XXXXX]TATCTGAGGAGCCGGTCACCTGTACCA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(5, refsToRemove.Count);

            Assert.Equal("AGAAGTACTCAT", allele.ReferenceAllele);
            Assert.Equal("AGATA", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #4
0
        public void CheckSNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 50, 100 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("C", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
            Assert.Equal(100, allele.TotalCoverage);
            Assert.Equal(50, allele.NumNoCalls);
            Assert.Equal((1f / 3f), allele.FractionNoCalls);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[1].VcfReferenceAllele = "G";
            clusterVariantSites[1].VcfAlternateAllele = "T";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("G", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(28608287, allele.ReferencePosition);

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(1, refsToRemove.Count);
            Assert.Equal("AGG", allele.ReferenceAllele);
            Assert.Equal("CGT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #5
0
        public void CheckMNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites = new List <int> {
                100, 200
            };
            var clusterCountsAtSites = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AG";
            clusterVariantSites[0].VcfAlternateAllele = "CC";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AG", allele.Reference);
            Assert.Equal("CC", allele.Alternate);
            Assert.Equal(28608285, allele.Coordinate);


            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[1].VcfReferenceAllele = "GA";
            clusterVariantSites[1].VcfAlternateAllele = "TT";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("GA", allele.Reference);
            Assert.Equal("TT", allele.Alternate);
            Assert.Equal(28608287, allele.Coordinate);

            clusterVariantSites[0].VcfReferenceAllele = "AG";
            clusterVariantSites[0].VcfAlternateAllele = "CC";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AGGA", allele.Reference);
            Assert.Equal("CCTT", allele.Alternate);
            Assert.Equal(28608285, allele.Coordinate);
        }
        public void AddRejectedPhasedVariant(CalledAllele variant)
        {
            var match = _rejectedPhasedVariants.Find(v => v.IsSameAllele(variant));

            if (match == null)
            {
                _rejectedPhasedVariants.Add(variant);
            }
            else
            {
                var combinedVar = PhasedVariantExtractor.CombinePhasedVariants(match, variant, MaxQScore);
                _rejectedPhasedVariants.Remove(match);
                _rejectedPhasedVariants.Add(combinedVar);
            }
        }
Exemple #7
0
        public void CreateMnvsFromClusters(IEnumerable <ICluster> clusters, int qNoiselevel, int maxQscore, bool crushNbhd = false)
        {
            if (clusters == null)
            {
                return;
            }
            if (clusters.Count() == 0)
            {
                return;
            }

            var depthAtSites   = new int[0];
            var nocallsAtSites = new int[0];

            DepthAtSites(clusters, out depthAtSites, out nocallsAtSites);

            Logger.WriteToLog("Creating MNVs from clusters.");

            int anchorPosition = -1;

            //if we are crushing the vcf, or in diploid mode, always report all phased alleles throug the nbhd, starting at the first position of interest. (ie, the first position we started phasing on)
            //If we are in somatic mode or uncrushed mode, we just report the variants at the loci we find them on (normal Pisces)
            if (crushNbhd || _nbhdGTcalculator.PloidyModel == Pisces.Domain.Types.PloidyModel.Diploid)
            {
                anchorPosition = FirstPositionOfInterest;
            }


            foreach (var cluster in clusters)
            {
                CalledAllele mnv;

                var clusterConsensus = cluster.GetConsensusSites();

                Logger.WriteToLog(cluster.Name + "\tVariantSites\t" + VariantSite.ArrayToString(clusterConsensus));
                Logger.WriteToLog(cluster.Name + "\tVariantPositions\t" + VariantSite.ArrayToPositions(clusterConsensus));


                var referenceRemoval = PhasedVariantExtractor.Extract(out mnv, clusterConsensus,
                                                                      ReferenceSequence, depthAtSites, nocallsAtSites, cluster.CountsAtSites, ReferenceName, qNoiselevel, maxQscore, anchorPosition);

                if ((mnv.Type != Pisces.Domain.Types.AlleleCategory.Reference) && mnv.AlleleSupport != 0)
                {
                    Logger.WriteToLog(cluster.Name + "mnv accepted:\t" + mnv.ToString());
                    AddAcceptedPhasedVariant(mnv);

                    //keep track of reference calls sucked into MNVs.
                    //We will need to subtract this from the ref counts when we write out the final vcf.
                    foreach (var refPosition in referenceRemoval.Keys)
                    {
                        if (!UsedRefCountsLookup.ContainsKey(refPosition))
                        {
                            var suckedUpRefRecord = new SuckedUpRefRecord()
                            {
                                Counts = 0, AlleleThatClaimedIt = mnv
                            };
                            UsedRefCountsLookup.Add(refPosition, suckedUpRefRecord);
                        }

                        UsedRefCountsLookup[refPosition].Counts += referenceRemoval[refPosition].Counts;
                    }
                }
                else if (mnv.TotalCoverage != 0) //dont add empty stuff..
                {
                    Logger.WriteToLog("mnv rejected:\t" + mnv.ToString());
                    AddRejectedPhasedVariant(mnv);
                }
            }
            foreach (var phasedVariant in CandidateVariants)
            {
                var calledPhasedVariant = phasedVariant as CalledAllele;
                if (calledPhasedVariant == null)
                {
                    continue;
                }

                calledPhasedVariant.ReferenceSupport = phasedVariant.TotalCoverage - phasedVariant.AlleleSupport;
                if (UsedRefCountsLookup.ContainsKey(phasedVariant.ReferencePosition) && (UsedRefCountsLookup[phasedVariant.ReferencePosition].AlleleThatClaimedIt != phasedVariant))
                {
                    calledPhasedVariant.ReferenceSupport = calledPhasedVariant.ReferenceSupport - UsedRefCountsLookup[phasedVariant.ReferencePosition].Counts;
                }

                calledPhasedVariant.ReferenceSupport = Math.Max(0, calledPhasedVariant.ReferenceSupport);
            }
        }
Exemple #8
0
        public void AddMnvsFromClusters(IEnumerable <ICluster> clusters, int qNoiselevel, int maxQscore, bool crushNbhd = false)
        {
            if (clusters == null)
            {
                return;
            }
            if (clusters.Count() == 0)
            {
                return;
            }

            var depthAtSites = DepthAtSites(clusters);

            Logger.WriteToLog("Creating MNVs from clusters.");

            int anchorPosition = -1;


            foreach (var cluster in clusters)
            {
                CalledAllele mnv;

                var clusterConsensus = cluster.GetConsensusSites();

                if (crushNbhd && (anchorPosition == -1))
                {
                    anchorPosition = clusterConsensus.First().VcfReferencePosition;
                }

                Logger.WriteToLog(cluster.Name + "\tVariantSites\t" + VariantSite.ArrayToString(clusterConsensus));
                Logger.WriteToLog(cluster.Name + "\tVariantPositions\t" + VariantSite.ArrayToPositions(clusterConsensus));


                var referenceRemoval = PhasedVariantExtractor.Extract(out mnv, clusterConsensus,
                                                                      ReferenceSequence, depthAtSites.ToList(), cluster.CountsAtSites, ReferenceName, qNoiselevel, maxQscore, anchorPosition);

                if ((mnv.Type != Pisces.Domain.Types.AlleleCategory.Reference) && mnv.AlleleSupport != 0)
                {
                    Logger.WriteToLog(cluster.Name + "mnv accepted:\t" + mnv.ToString());
                    AddAcceptedPhasedVariant(mnv);

                    //keep track of reference calls sucked into MNVs.
                    //We will need to subtract this from the ref counts when we write out the final vcf.
                    foreach (var refPosition in referenceRemoval.Keys)
                    {
                        if (!UsedRefCountsLookup.ContainsKey(refPosition))
                        {
                            UsedRefCountsLookup.Add(refPosition, 0);
                        }

                        UsedRefCountsLookup[refPosition] += referenceRemoval[refPosition];
                    }
                }
                else if (mnv.TotalCoverage != 0) //dont add empty stuff..
                {
                    Logger.WriteToLog("mnv rejected:\t" + mnv.ToString());
                    AddRejectedPhasedVariant(mnv);
                }
            }
            foreach (var phasedVariant in CandidateVariants)
            {
                var calledPhasedVariant = phasedVariant as CalledAllele;
                if (calledPhasedVariant == null)
                {
                    continue;
                }

                calledPhasedVariant.ReferenceSupport = phasedVariant.TotalCoverage - phasedVariant.AlleleSupport;
                if (UsedRefCountsLookup.ContainsKey(phasedVariant.Coordinate))
                {
                    calledPhasedVariant.ReferenceSupport = calledPhasedVariant.ReferenceSupport - UsedRefCountsLookup[phasedVariant.Coordinate];
                }

                calledPhasedVariant.ReferenceSupport = Math.Max(0, calledPhasedVariant.ReferenceSupport);
            }
        }
Exemple #9
0
        public void CheckSNVsWithAnchoring()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("C", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            //and example where the first VS is N
            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[1].VcfReferenceAllele = "G";
            clusterVariantSites[1].VcfAlternateAllele = "T";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(2, refsToRemove.Count);
            Assert.Equal(190, refsToRemove[28608285].Counts);
            Assert.Equal(190, refsToRemove[28608286].Counts);
            Assert.Equal("AGG", allele.ReferenceAllele);
            Assert.Equal("AGT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            //an example where there are two real VS
            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            clusterVariantSites[1].VcfReferenceAllele = "G";
            clusterVariantSites[1].VcfAlternateAllele = "T";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(1, refsToRemove.Count);
            Assert.Equal(140, refsToRemove[28608286].Counts); // (190+90)/2
            Assert.Equal("AGG", allele.ReferenceAllele);
            Assert.Equal("CGT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            //an example where there is one ref in between two real VS
            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287), new VariantSite(28608288)
            };

            neighborhoodDepthAtSites   = new int[] { 100, 200, 300 };
            neighborhoodNoCallsAtSites = new int[] { 0, 0, 0 };
            clusterCountsAtSites       = new int[] { 90, 190, 20 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            clusterVariantSites[1].VcfReferenceAllele = "N";
            clusterVariantSites[1].VcfAlternateAllele = "N";

            clusterVariantSites[2].VcfReferenceAllele = "G";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(2, refsToRemove.Count);
            Assert.Equal(55, refsToRemove[28608286].Counts); // (90+20)/2
            Assert.Equal(55, refsToRemove[28608286].Counts); // (90+20)/2
            Assert.Equal("AGAG", allele.ReferenceAllele);
            Assert.Equal("CGAT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #10
0
        public void CheckDeletionsInHomopolymerStretches()
        {
            //(1)

            string referenceSequenceWithRepeats = "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT";
            //Where deletions occur:           = "TTTTXXXTTTXXXXXXTTTTTTTTTTTTTTTTTTTTTT";
            //Expected result:                  = TTTTTTTTTT -> T

            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608288), new VariantSite(28608294)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "T";
            clusterVariantSites[0].VcfAlternateAllele = "T";

            clusterVariantSites[1].VcfReferenceAllele = "TTTT";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTTTTTT";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("TTTTTTTTTT", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(28608288, allele.ReferencePosition);

            //(2) A similar, contrived case (N's instead of ref) that would cause the problem.

            clusterVariantSites[0].VcfReferenceAllele = "N";
            clusterVariantSites[0].VcfAlternateAllele = "N";

            clusterVariantSites[1].VcfReferenceAllele = "TTTT";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTTTTTT";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("TTTTTTTTTT", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(28608288, allele.ReferencePosition);

            //(3)

            clusterVariantSites[0].VcfReferenceAllele = "G";
            clusterVariantSites[0].VcfAlternateAllele = "GT";

            clusterVariantSites[1].VcfReferenceAllele = "TTTT";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTTTTTT";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("TTTTTTTTT", allele.ReferenceAllele); // <- (note, ref allele now has one less T)
            Assert.Equal("T", allele.AlternateAllele);         //
            Assert.Equal(28608285, allele.ReferencePosition);  // left shifting, all the insetion joins to the first variant

            //(4)

            clusterVariantSites[0].VcfReferenceAllele = "G";
            clusterVariantSites[0].VcfAlternateAllele = "GA";

            clusterVariantSites[1].VcfReferenceAllele = "TTTT";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTTTTTT";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("TTTTTTTTT", allele.ReferenceAllele);
            Assert.Equal("A", allele.AlternateAllele);
            Assert.Equal(28608286, allele.ReferencePosition);

            //(5)

            clusterVariantSites[0].VcfReferenceAllele = "T";
            clusterVariantSites[0].VcfAlternateAllele = "TTT";

            clusterVariantSites[1].VcfReferenceAllele = "TTTT";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTTTTTT";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("TTTTTTTT", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #11
0
        public void CheckDeletions()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AGAAGTACTCATTATCTGA";
            clusterVariantSites[0].VcfAlternateAllele = "A";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("AGAAGTACTCATTATCTGA", allele.ReferenceAllele);
            Assert.Equal("A", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            // testing a real bug
            // G>G T>T TTG>T ATG>A .
            // mnv accepted:	chr5    176517113.GTCCGTATG   CCGTA.

            /*
             * chr5    176517099.T   TTG 86
             * chr5    176517099.TTG T   55
             * chr5    176517100.T.
             * chr5    176517101.G.   100 PASS DP = 298  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:283:298:0.0503:20:-100.0000:0.0165
             * chr5    176517102.T.   100 PASS DP = 296  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:294:296:0.0068:20:-100.0000:0.0199
             * chr5    176517103.G.   100 PASS DP = 302  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:301:302:0.0033:20:-100.0000:0.0098
             * chr5    176517104.T.   100 PASS DP = 295  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:293:295:0.0068:20:-100.0000:0.0232
             * chr5    176517105.G.   100 PASS DP = 297  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:297:297:0.0000:20:-100.0000:0.0166
             * chr5    176517106.T.   100 PASS DP = 294  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:293:294:0.0034:20:-100.0000:0.0265
             * chr5    176517107.G.   100 PASS DP = 301  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:301:301:0.0000:20:-100.0000:0.0033
             * chr5    176517108.T.   100 PASS DP = 293  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:293:293:0.0000:20:-100.0000:0.0298
             * chr5    176517109.G.   100 PASS DP = 301  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:301:301:0.0000:20:-100.0000:0.0066
             * chr5    176517110.T.   100 PASS DP = 287  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:286:287:0.0035:20:-100.0000:0.0559
             * chr5    176517111.G.   100 PASS DP = 300  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:298:300:0.0067:20:-100.0000:0.0066
             * chr5    176517112.T.   100 PASS DP = 293  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:292:293:0.0034:20:-100.0000:0.0201
             * chr5    176517113.G.   100 PASS DP = 289  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:288:289:0.0035:20:-100.0000:0.0137
             * chr5    176517114.T.   100 PASS DP = 280  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:279:280:0.0036:20:-100.0000:0.0378
             * chr5    176517115.C.   100 PASS DP = 257  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:255:257:0.0078:20:-100.0000:0.1076
             * chr5    176517116.C.   100 LowDP DP = 222  GT: GQ: AD: DP: VF: NL: SB: NC./.:100:220:222:0.0090:20:-100.0000:0.1898
             * chr5    176517117.G.   100 PASS DP = 262  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:262:262:0.0000:20:-100.0000:0.0260
             * chr5    176517118.T.   100 PASS DP = 257  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 0:100:257:257:0.0000:20:-100.0000:0.0410
             * chr5    176517119.ATG A   64  PASS DP = 251  GT: GQ: AD: DP: VF: NL: SB: NC 0 / 1:64:237,14:251:0.0558:20:-20.0580:0.0000
             */

            clusterVariantSites = new VariantSite[] {
                new VariantSite(176517098), new VariantSite(176517099),
                new VariantSite(176517099), new VariantSite(176517119)
            };


            neighborhoodDepthAtSites   = new int[] { 100, 200, 100, 200 };
            neighborhoodNoCallsAtSites = new int[] { 0, 0, 0, 0 };
            clusterCountsAtSites       = new int[] { 90, 190, 90, 90 };

            clusterVariantSites[0].VcfReferenceAllele = "G";
            clusterVariantSites[0].VcfAlternateAllele = "G";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            clusterVariantSites[2].VcfReferenceAllele = "TTG";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            clusterVariantSites[3].VcfReferenceAllele = "ATG";
            clusterVariantSites[3].VcfAlternateAllele = "A";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(18, refsToRemove.Count);
            Assert.Equal("TGGTACTCATTATCTGAGGATG", allele.ReferenceAllele);
            Assert.Equal("GTACTCATTATCTGAGGA", allele.AlternateAllele);
            Assert.Equal(176517100, allele.ReferencePosition);

            //now, suppose we had  7x"TG" + "TCCGT" in between, instead of "R"
            string realReferenceSequence = "GTTGTGTGTGTGTGTG" + "TCCGT" + "ATG";

            //the ref would be like this: "TGTGTGTGTGTGTGTCCGTATG"
            //the alt would be like this: "TGTGTGTGTGTGTCCGTA"
            //starting at position  176517100

            //but Scylla would clean it up
            //the ref would be like this: "-------------GTCCGTATG"
            //the alt would be like this: "------------CCGTA"
            //starting at position  176517100 + 1(mnv style reporting) + 12 (where the alt agreed with the reference)

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, realReferenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(18 - 12, refsToRemove.Count);
            Assert.Equal("GTCCGTATG", allele.ReferenceAllele);
            Assert.Equal("CCGTA", allele.AlternateAllele);
            Assert.Equal(176517113, allele.ReferencePosition);
        }
Exemple #12
0
        public void CheckInsertionsInHomopolymerStretches()
        {
            //(1) The exact case of the original bug

            string referenceSequenceWithRepeats = "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT";
            var    allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var    clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608288), new VariantSite(28608289)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "T";
            clusterVariantSites[0].VcfAlternateAllele = "T";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "TTTT";

            clusterVariantSites[2].VcfReferenceAllele = "T";
            clusterVariantSites[2].VcfAlternateAllele = "TTTTTTT";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TTTTTTTTTT", allele.AlternateAllele);
            Assert.Equal(28608288, allele.ReferencePosition);

            //(2) A similar, contrived case (N's instead of ref) that would cause the problem.

            clusterVariantSites[0].VcfReferenceAllele = "N";
            clusterVariantSites[0].VcfAlternateAllele = "N";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "TTTT";

            clusterVariantSites[2].VcfReferenceAllele = "T";
            clusterVariantSites[2].VcfAlternateAllele = "TTTTTTT";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TTTTTTTTTT", allele.AlternateAllele);
            Assert.Equal(28608288, allele.ReferencePosition);

            //(3) Another contrived case that would cause the problem.

            clusterVariantSites[0].VcfReferenceAllele = "G";
            clusterVariantSites[0].VcfAlternateAllele = "GT";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "TTTT";

            clusterVariantSites[2].VcfReferenceAllele = "T";
            clusterVariantSites[2].VcfAlternateAllele = "TTTTTTT";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TTTTTTTTTTT", allele.AlternateAllele); // <- (note, alt alt allele now has one extra T)
            Assert.Equal(28608285, allele.ReferencePosition);    // left shifting, all the insetion joins to the first variant

            //(4) A case that would NOT cause the problem. (the A insertion doesnt make the repeat section
            //in the reference sequence, so that saves it.

            clusterVariantSites[0].VcfReferenceAllele = "G";
            clusterVariantSites[0].VcfAlternateAllele = "GA";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "TTTT";

            clusterVariantSites[2].VcfReferenceAllele = "T";
            clusterVariantSites[2].VcfAlternateAllele = "TTTTTTT";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TATTTTTTTTT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            //(5) Another case that might cause the problem

            clusterVariantSites[0].VcfReferenceAllele = "TTT";
            clusterVariantSites[0].VcfAlternateAllele = "T";

            clusterVariantSites[1].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "TTTT";

            clusterVariantSites[2].VcfReferenceAllele = "T";
            clusterVariantSites[2].VcfAlternateAllele = "TTTTTTT";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequenceWithRepeats,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TTTTTTTT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #13
0
        public void CheckInsertionsWorkWithAnchoring()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "AGAAGTACTCATTATCTGT";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            //Assert.Equal(0, refsToRemove.Count);
            Assert.Equal(1, refsToRemove.Count); //28608285, 90

            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("AGAAGTACTCATTATCTGT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);


            //check co-located insertions

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";

            clusterVariantSites[0].VcfAlternateAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "CGTA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("C", allele.ReferenceAllele);
            Assert.Equal("TGTA", allele.AlternateAllele); //this only comes out correct so long as the VS are ordered correctly in the list.
            Assert.Equal(28608285, allele.ReferencePosition);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";


            //check co-located insertions

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608286)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            clusterVariantSites[1].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfAlternateAllele = "CGTA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(2, refsToRemove.Count);

            Assert.Equal("AG", allele.ReferenceAllele);
            Assert.Equal("AGGTA", allele.AlternateAllele); //this only comes out correct so long as the VS are ordered correctly in the list.
            Assert.Equal(28608285, allele.ReferencePosition);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };


            //check a mix of insertions and references

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608286),
                new VariantSite(28608288), new VariantSite(28608290), new VariantSite(28608291)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            clusterVariantSites[1].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfAlternateAllele = "C";

            clusterVariantSites[2].VcfReferenceAllele = "C";
            clusterVariantSites[2].VcfAlternateAllele = "CGTA";

            clusterVariantSites[3].VcfReferenceAllele = "C";
            clusterVariantSites[3].VcfAlternateAllele = "CCATCAT";

            clusterVariantSites[4].VcfReferenceAllele = "C";
            clusterVariantSites[4].VcfAlternateAllele = "C";


            neighborhoodDepthAtSites   = new int[] { 100, 200, 100, 200, 200 };
            neighborhoodNoCallsAtSites = new int[] { 0, 0, 0, 0, 0 };
            clusterCountsAtSites       = new int[] { 90, 190, 20, 20, 20 };

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);


            //referenceSequence = "AGAA-GT-ACTCATTATCTGAGGAGCCGGTCACCTGTACCA";

            //with insertions = "AGAA[GTA]GT[CATCAT]ACTCATTATCTGAGGAGCCGGTCACCTGTACCA";

            Assert.Equal(6, refsToRemove.Count);

            Assert.Equal("AGAAG", allele.ReferenceAllele);
            Assert.Equal("AGAAGTAGTCATCA", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };
        }
Exemple #14
0
        public void CheckInsertions()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "AGAAGTACTCATTATCTGA";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("AGAAGTACTCATTATCTGA", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);


            //check co-located insertions

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";

            clusterVariantSites[0].VcfAlternateAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "CGTA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("C", allele.ReferenceAllele);
            Assert.Equal("TGTA", allele.AlternateAllele); //this only comes out correct so long as the VS are ordered correctly in the list.
            Assert.Equal(28608285, allele.ReferencePosition);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";

            //here we put the alleles in the wrong order with the insertion first.
            clusterVariantSites[0].VcfAlternateAllele = "CGTA";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            //note that now the MNV and the position are wrong.
            //(they were correct in the previous example)
            //This demonstrates and assumption of the PhasedVariantExtractor.Extract
            //algorithm: the VS must be in order of their true position (first base of difference).

            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("AGTA", allele.AlternateAllele); //old bug.
            Assert.Equal(28608285, allele.ReferencePosition);

            //check colocated insertions with repeats inside them

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "T";
            clusterVariantSites[1].VcfReferenceAllele = "T";

            //here we put the alleles in the wrong order with the insertion first.
            clusterVariantSites[0].VcfAlternateAllele = "TTTTTT";
            clusterVariantSites[1].VcfAlternateAllele = "TTTTTTTTT";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            //note that now the MNV and the position are wrong.
            //(they were correct in the previous example)
            //This demonstrates and assumption of the PhasedVariantExtractor.Extract
            //algorithm: the VS must be in order of their true position (first base of difference).

            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("ATTTTTTTTTTTTT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);

            //
            //(6) Check insertions with ambigous trimming on each side
            //This example creates a G-> GGAAGGG allele
            //that trims to {} -> GGAAGG allele
            //And then the reference "A" gets repadded.

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608286)
            };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "AGGAA";

            //here we put the alleles in the wrong order with the insertion first.
            clusterVariantSites[1].VcfReferenceAllele = "G";
            clusterVariantSites[1].VcfAlternateAllele = "GGG";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            //note that now the MNV and the position are wrong.
            //(they were correct in the previous example)
            //This demonstrates and assumption of the PhasedVariantExtractor.Extract
            //algorithm: the VS must be in order of their true position (first base of difference).

            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("AGGAAGG", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
Exemple #15
0
        public void CheckInsertions()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites = new List <int> {
                100, 200
            };
            var clusterCountsAtSites = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "AGAAGTACTCATTATCTGA";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("A", allele.Reference);
            Assert.Equal("AGAAGTACTCATTATCTGA", allele.Alternate);
            Assert.Equal(28608285, allele.Coordinate);


            //check co-located insertions

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";

            clusterVariantSites[0].VcfAlternateAllele = "T";
            clusterVariantSites[1].VcfAlternateAllele = "CGTA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            Assert.Equal("C", allele.Reference);
            Assert.Equal("TGTA", allele.Alternate); //this only comes out correct so long as the VS are ordered correctly in the list.
            Assert.Equal(28608285, allele.Coordinate);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608285)
            };

            clusterVariantSites[0].VcfReferenceAllele = "C";
            clusterVariantSites[1].VcfReferenceAllele = "C";

            //here we put the alleles in the wrong order with the insertion first.
            clusterVariantSites[0].VcfAlternateAllele = "CGTA";
            clusterVariantSites[1].VcfAlternateAllele = "T";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);

            //note that now the MNV and the position are wrong.
            //(they were correct in the previous example)
            //This demonstrates and assumption of the PhasedVariantExtractor.Extract
            //algorithm: the VS must be in order of their true position (first base of difference).
            Assert.Equal("C", allele.Reference);
            Assert.Equal("GTAT", allele.Alternate); //old bug. this used to come out as GTAT
            Assert.Equal(28608286, allele.Coordinate);
        }