Exemple #1
0
        private void RecalculateIndelAndAddIfNeeded(bool allowRescue, KeyValuePair <HashableIndel, List <IndelEvidence> > indelToRecalculate,
                                                    IndelStatusCounter statusCounter, double edgeThreshold, List <PreIndel> indelsToAdd)
        {
            var hashable = indelToRecalculate.Key;
            var indel    = new PreIndel(new CandidateAllele(hashable.Chromosome, hashable.ReferencePosition,
                                                            hashable.ReferenceAllele, hashable.AlternateAllele, hashable.Type));

            indel.InMulti    = hashable.InMulti;
            indel.OtherIndel = hashable.OtherIndel;

            var metrics = new IndelEvidence();

            foreach (var metricsList in indelToRecalculate.Value)
            {
                metrics.AddIndelEvidence(metricsList);
            }

            var entryIndels = ExtractIndelsFromEntry(metrics, indel.ToString() + "|" + indel.OtherIndel,
                                                     statusCounter, edgeThreshold, allowRescue, new List <PreIndel>()
            {
                indel
            });

            if (entryIndels != null)
            {
                indelsToAdd.AddRange(entryIndels);
            }
        }
 private void VerifyIndelEvidence(string expectedIndel, IndelEvidence expectedEvidence, string actualIndel,
                                  IndelEvidence actualEvidence)
 {
     Assert.Equal(expectedIndel, actualIndel);
     Assert.Equal(expectedEvidence.Forward, actualEvidence.Forward);
     Assert.Equal(expectedEvidence.Reverse, actualEvidence.Reverse);
     Assert.Equal(expectedEvidence.LeftAnchor, actualEvidence.LeftAnchor);
     Assert.Equal(expectedEvidence.RightAnchor, actualEvidence.RightAnchor);
     Assert.Equal(expectedEvidence.ReputableSupport, actualEvidence.ReputableSupport);
 }
Exemple #3
0
        private void ValidateEvidenceMatches(IndelEvidence expected, IndelEvidence actual)
        {
            Assert.Equal(expected.Stitched, actual.Stitched);
            Assert.Equal(expected.Forward, actual.Forward);
            Assert.Equal(expected.Reverse, actual.Reverse);
            Assert.Equal(expected.Observations, actual.Observations);
            Assert.Equal(expected.Quality, actual.Quality);
            Assert.Equal(expected.Mess, actual.Mess);
            Assert.Equal(expected.LeftAnchor, actual.LeftAnchor);
            Assert.Equal(expected.RightAnchor, actual.RightAnchor);
            Assert.Equal(expected.IsRepeat, actual.IsRepeat);
            Assert.Equal(expected.ReputableSupport, actual.ReputableSupport);
            Assert.Equal(expected.IsSplit, actual.IsSplit);

            //Assert.Equal(expected.Length, actual.Length);
            //for (int i = 0; i < expected.Length; i++)
            //{
            //    Assert.Equal(expected[i], actual[i]);
            //}
        }
Exemple #4
0
        public void FindIndelsAndRecordEvidence()
        {
            var readPair     = TestHelpers.GetPair("5M1D5M", "5M2I4M", nm2: 3);
            var readPair2    = TestHelpers.GetPair("3M1D8M", "5M1D5M", nm2: 4);
            var targetFinder = new IndelTargetFinder();
            var lookup       = new Dictionary <string, IndelEvidence>();

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10);

            var expectedDel = "chr1:104 NN>N";
            var expectedIns = "chr1:104 N>NTT";

            Assert.Equal(1.0, lookup.Count);
            Assert.Equal(expectedDel, lookup.Keys.First());

            //obs,left,right,mess,quals,fwd,reverse,stitched,reput
            var evidence = new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 5,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            };

            ValidateEvidenceMatches(evidence, lookup[expectedDel]);

            // Build evidence for same indel, let's call it stitched this time
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10, true);
            Assert.Equal(1.0, lookup.Count);
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(new IndelEvidence()
            {
                Observations     = 2,
                LeftAnchor       = 10,
                RightAnchor      = 10,
                Mess             = 0,
                Quality          = 60,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            // Build evidence for same indel from a different read, this one's not reputable and is reverse
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair2.Read2, targetFinder, lookup, false, "chr1", 10);
            Assert.Equal(1.0, lookup.Count);
            Assert.Contains(expectedDel, lookup.Keys);
            // mess should subtract ins length from nm
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            // Different indel, reverse only
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read2, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(2, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            Assert.Contains(expectedIns, lookup.Keys);
            // mess should subtract ins length from nm
            ValidateEvidenceMatches(
                new IndelEvidence
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 1,
                Quality          = 30,
                Forward          = 0,
                Reverse          = 1,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }
                , lookup[expectedIns]);


            // Multi-indel
            var readPairMulti = TestHelpers.GetPair("5M1D1M1D4M", "5M1D1M1D4M", nm: 2, nm2: 2);

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMulti.Read1, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(3, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);
            var expectedMulti = "chr1:104 NN>N|chr1:106 NN>N";

            Assert.Contains(expectedMulti, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedMulti]);

            // Multi that are far apart - allow to track individually too.
            var readPairMultiFar = TestHelpers.GetPair("5M1D26M1D4M", "5M1D26M1D4M", nm: 2, nm2: 2);

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMultiFar.Read1, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(5, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 4,
                LeftAnchor       = 20,
                RightAnchor      = 41,
                Mess             = 4,
                Quality          = 120,
                Forward          = 2,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 3,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);
            var expectedMultiFar = "chr1:104 NN>N|chr1:131 NN>N";

            Assert.Contains(expectedMultiFar, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedMultiFar]);
            var expectedSecondSingleFromMulti = "chr1:131 NN>N";

            Assert.Contains(expectedSecondSingleFromMulti, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 26,
                RightAnchor      = 4,
                Mess             = 1,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedSecondSingleFromMulti]);
        }
        public void CollectIndelEvidence()
        {
            var targetFinder = new IndelTargetFinder();
            var pairs        = new List <PairResult>();

            // Reputable read
            var pair1 = TestHelpers.GetPairResult(1, 0,
                                                  classification: PairClassification.IndelUnstitchable,
                                                  hasIndels: true, isReputableIndelContaining: true);
            // Less reputable
            var pair2 = TestHelpers.GetPairResult(1, 0,
                                                  classification: PairClassification.IndelUnstitchable, hasIndels: true);

            pairs.Add(pair1);

            ConcurrentDictionary <string, IndelEvidence> indelLookup = new ConcurrentDictionary <string, IndelEvidence>();
            var results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray());

            // This is just a pass-through
            Assert.Equal(pairs.Count, results.Length);

            // Check indel evidence
            Assert.Equal(1, indelLookup.Count);
            var indel            = indelLookup.First();
            var expectedEvidence = new IndelEvidence()
            {
                Forward          = 1,
                Reverse          = 1,
                LeftAnchor       = 10,
                RightAnchor      = 10,
                ReputableSupport = 2
            };

            VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value);

            // Add on more indel evidence for the same one
            results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray());
            Assert.Equal(pairs.Count, results.Length);
            Assert.Equal(1, indelLookup.Count);
            indel            = indelLookup.First();
            expectedEvidence = new IndelEvidence()
            {
                Forward          = 2,
                Reverse          = 2,
                LeftAnchor       = 20,
                RightAnchor      = 20,
                ReputableSupport = 4
            };
            VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value);

            // Add on some less reputable evidence
            pairs.Clear();
            pairs.Add(pair2);
            results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray());
            Assert.Equal(pairs.Count, results.Length);
            Assert.Equal(1, indelLookup.Count);
            indel            = indelLookup.First();
            expectedEvidence = new IndelEvidence()
            {
                Forward          = 3,
                Reverse          = 3,
                LeftAnchor       = 30,
                RightAnchor      = 30,
                ReputableSupport = 4
            };
            VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value);
        }
Exemple #6
0
        private bool ShouldRemoveVariant(int observationCount, float avgAnchorLeft, float avgAnchorRight, bool isStrong,
                                         IndelStatusCounter statusCounter, float avgQuals, float avgMess, int anchorLeft, int anchorRight, double edgeThreshold, IndelEvidence evidence)
        {
            if (observationCount < _foundThreshold || avgAnchorLeft < _anchorThreshold || avgAnchorRight < _anchorThreshold || avgMess > _maxMess)
            {
                if (isStrong)
                {
                    evidence.Outcome = Outcome.Rescued;
                    statusCounter.Rescued++;
                }
                else
                {
                    evidence.Outcome = Outcome.BelowThreshold;
                    statusCounter.BelowThreshold++;
                    return(true);
                }
            }


            if (observationCount == 1 && (Math.Min(anchorLeft, anchorRight) < 5 || avgMess > 1 || avgQuals < 30))
            {
                evidence.Outcome = Outcome.PoorSingle;
                statusCounter.PoorSingle++;
                // Even if we want to allow single-observation variants to be realigned against, maybe let's avoid the really junky ones
                return(true);
            }

            if ((observationCount <= edgeThreshold) && (avgMess > 2 || avgQuals < 25))
            {
                evidence.Outcome = Outcome.PoorEdge;
                statusCounter.PoorEdge++;
                return(true);
            }

            return(false);
        }
Exemple #7
0
        private List <PreIndel> ExtractIndelsFromEntry(IndelEvidence indelMetrics, string keyString,
                                                       IndelStatusCounter statusCounter,
                                                       double edgeThreshold, bool allowRescue, List <PreIndel> indels)
        {
            var indelsToAdd              = new List <PreIndel>();
            var observationCount         = indelMetrics.Observations;
            var anchorLeft               = indelMetrics.LeftAnchor;
            var anchorRight              = indelMetrics.RightAnchor;
            var mess                     = indelMetrics.Mess;
            var quals                    = indelMetrics.Quality;
            var fwdSupport               = indelMetrics.Forward / (float)observationCount;
            var reverseSupport           = indelMetrics.Reverse / (float)observationCount;
            var stitchedSupport          = indelMetrics.Stitched / (float)observationCount;
            var reputableSupportFraction = indelMetrics.ReputableSupport / (float)observationCount;
            var numFromUnanchoredRepeat  = indelMetrics.IsRepeat;
            var numFromMateUnmapped      = indelMetrics.IsSplit;

            var avgAnchorLeft  = anchorLeft / (float)observationCount;
            var avgAnchorRight = anchorRight / (float)observationCount;

            var avgQuals = quals / (float)observationCount;
            var avgMess  = mess / (float)observationCount;

            // TODO clean this up, no more magic
            bool isStrong = false;

            if (allowRescue)
            {
                isStrong = IsStrong(avgQuals, reputableSupportFraction, avgAnchorLeft, avgMess, avgAnchorRight,
                                    reverseSupport, observationCount, fwdSupport, keyString, stitchedSupport);
            }

            if (indels.Count > 2)
            {
                Logger.WriteToLog(
                    $"Can't support more than 2 indels in one read: ignoring multi-indel {keyString} (seen {observationCount} times)");
            }
            else if (indels.Count > 1)
            {
                var indel1 = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                               reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);
                var indel2 = GetIndelFromEntry(indels[1], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                               reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);

                indel1.InMulti = true;
                indel2.InMulti = true;

                indel1.OtherIndel = Helper.CandidateToString(indel2);
                indel2.OtherIndel = Helper.CandidateToString(indel1);

                indelsToAdd.Add(indel1);
                indelsToAdd.Add(indel2);
            }
            else
            {
                var indel = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                              reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);

                indelsToAdd.Add(indel);
            }

            if (indels.Count == 1 && indelsToAdd[0].Length == 1 && (observationCount < _foundThreshold * 0.8 || observationCount <= 2))
            {
                indelMetrics.Outcome = Outcome.SuperWeakSmall;
                return(null);
            }
            if (ShouldRemoveVariant(observationCount, avgAnchorLeft, avgAnchorRight, isStrong, statusCounter,
                                    avgQuals,
                                    avgMess, anchorLeft, anchorRight, edgeThreshold, indelMetrics))
            {
                return(null);
            }

            statusCounter.Kept++;

            return(indelsToAdd);
        }
Exemple #8
0
        public void GetRealignablePreIndels()
        {
            var filterer = new BasicIndelFilterer(0, 0, false);

            // Good support, good anchors, good direction balance, low mess
            var goodEvidence = new IndelEvidence()
            {
                Observations     = 10,
                LeftAnchor       = 500,
                RightAnchor      = 500,
                Mess             = 3,
                Quality          = 300,
                Forward          = 3,
                Reverse          = 3,
                Stitched         = 4,
                ReputableSupport = 5,
                IsRepeat         = 0,
                IsSplit          = 0
            };
            // Bad left anchor
            var badLeftAnchor = new IndelEvidence()
            {
                Observations     = 10,
                LeftAnchor       = 100,
                RightAnchor      = 900,
                Mess             = 3,
                Quality          = 300,
                Forward          = 3,
                Reverse          = 3,
                Stitched         = 4,
                ReputableSupport = 5,
                IsRepeat         = 0,
                IsSplit          = 0
            };
            // Bad right anchor
            var badRightAnchor = new IndelEvidence()
            {
                Observations     = 10,
                LeftAnchor       = 900,
                RightAnchor      = 100,
                Mess             = 3,
                Quality          = 300,
                Forward          = 3,
                Reverse          = 3,
                Stitched         = 4,
                ReputableSupport = 5,
                IsRepeat         = 0,
                IsSplit          = 0
            };

            // Support too low
            var supportTooLow = new IndelEvidence()
            {
                Observations     = 4,
                LeftAnchor       = 200,
                RightAnchor      = 200,
                Mess             = 0,
                Quality          = 240,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 2,
                ReputableSupport = 4,
                IsRepeat         = 0,
                IsSplit          = 0
            };
            var supportTooLowAndIsMess = new IndelEvidence()
            {
                Observations     = 4,
                LeftAnchor       = 200,
                RightAnchor      = 200,
                Mess             = 3,
                Quality          = 240,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 2,
                ReputableSupport = 4,
                IsRepeat         = 0,
                IsSplit          = 0
            };

            var indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };

            var realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false);
            var indels            = realignableIndels.SelectMany(x => x.Value);

            Assert.Equal(5, indels.Count());

            // Filter by support only
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };
            filterer          = new BasicIndelFilterer(5, 0, false);
            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Equal(3, indels.Count());

            // Filter by anchor only
            // Note, by default we throw out anything with 0 observations (what does that even mean?)
            // Should keep chr1:123 A>ATG, chr3:123 A>ATG and chr4:123 A>ATG
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };
            filterer          = new BasicIndelFilterer(0, 20, false);
            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Equal(3, indels.Count());

            // Filter by anchor and support
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };

            filterer          = new BasicIndelFilterer(5, 20, false);
            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Single(indels);

            // Rescue good indel that doesn't meet the requirements
            // Low support but
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };

            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Equal(2, indels.Count());

            // Don't rescue stuff that falls below required minimum
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG", goodEvidence },
                { "chr1:123 A>ATGC", badLeftAnchor },
                { "chr2:123 ATG>A", badRightAnchor },
                { "chr3:123 A>ATG", supportTooLow },
                { "chr4:123 A>ATG", supportTooLowAndIsMess },
            };

            filterer          = new BasicIndelFilterer(5, 20, false, strictFoundThreshold: 5);
            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Equal(1.0, indels.Count());

            // Multis
            indelsDict = new Dictionary <string, IndelEvidence>()
            {
                { "chr1:123 A>ATG|chr1:140 C>CTG", goodEvidence },
            };

            filterer          = new BasicIndelFilterer(5, 20, false, strictFoundThreshold: 5);
            realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true);
            indels            = realignableIndels.SelectMany(x => x.Value);
            Assert.Equal(2.0, indels.Count());
        }