private void RecalculateIndelAndAddIfNeeded(bool allowRescue, KeyValuePair <HashableIndel, List <IndelEvidence> > indelToRecalculate, IndelStatusCounter statusCounter, double edgeThreshold, List <PreIndel> indelsToAdd) { var hashable = indelToRecalculate.Key; var indel = new PreIndel(new CandidateAllele(hashable.Chromosome, hashable.ReferencePosition, hashable.ReferenceAllele, hashable.AlternateAllele, hashable.Type)); indel.InMulti = hashable.InMulti; indel.OtherIndel = hashable.OtherIndel; var metrics = new IndelEvidence(); foreach (var metricsList in indelToRecalculate.Value) { metrics.AddIndelEvidence(metricsList); } var entryIndels = ExtractIndelsFromEntry(metrics, indel.ToString() + "|" + indel.OtherIndel, statusCounter, edgeThreshold, allowRescue, new List <PreIndel>() { indel }); if (entryIndels != null) { indelsToAdd.AddRange(entryIndels); } }
private void VerifyIndelEvidence(string expectedIndel, IndelEvidence expectedEvidence, string actualIndel, IndelEvidence actualEvidence) { Assert.Equal(expectedIndel, actualIndel); Assert.Equal(expectedEvidence.Forward, actualEvidence.Forward); Assert.Equal(expectedEvidence.Reverse, actualEvidence.Reverse); Assert.Equal(expectedEvidence.LeftAnchor, actualEvidence.LeftAnchor); Assert.Equal(expectedEvidence.RightAnchor, actualEvidence.RightAnchor); Assert.Equal(expectedEvidence.ReputableSupport, actualEvidence.ReputableSupport); }
private void ValidateEvidenceMatches(IndelEvidence expected, IndelEvidence actual) { Assert.Equal(expected.Stitched, actual.Stitched); Assert.Equal(expected.Forward, actual.Forward); Assert.Equal(expected.Reverse, actual.Reverse); Assert.Equal(expected.Observations, actual.Observations); Assert.Equal(expected.Quality, actual.Quality); Assert.Equal(expected.Mess, actual.Mess); Assert.Equal(expected.LeftAnchor, actual.LeftAnchor); Assert.Equal(expected.RightAnchor, actual.RightAnchor); Assert.Equal(expected.IsRepeat, actual.IsRepeat); Assert.Equal(expected.ReputableSupport, actual.ReputableSupport); Assert.Equal(expected.IsSplit, actual.IsSplit); //Assert.Equal(expected.Length, actual.Length); //for (int i = 0; i < expected.Length; i++) //{ // Assert.Equal(expected[i], actual[i]); //} }
public void FindIndelsAndRecordEvidence() { var readPair = TestHelpers.GetPair("5M1D5M", "5M2I4M", nm2: 3); var readPair2 = TestHelpers.GetPair("3M1D8M", "5M1D5M", nm2: 4); var targetFinder = new IndelTargetFinder(); var lookup = new Dictionary <string, IndelEvidence>(); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10); var expectedDel = "chr1:104 NN>N"; var expectedIns = "chr1:104 N>NTT"; Assert.Equal(1.0, lookup.Count); Assert.Equal(expectedDel, lookup.Keys.First()); //obs,left,right,mess,quals,fwd,reverse,stitched,reput var evidence = new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 5, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }; ValidateEvidenceMatches(evidence, lookup[expectedDel]); // Build evidence for same indel, let's call it stitched this time IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10, true); Assert.Equal(1.0, lookup.Count); Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches(new IndelEvidence() { Observations = 2, LeftAnchor = 10, RightAnchor = 10, Mess = 0, Quality = 60, Forward = 1, Reverse = 0, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); // Build evidence for same indel from a different read, this one's not reputable and is reverse IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair2.Read2, targetFinder, lookup, false, "chr1", 10); Assert.Equal(1.0, lookup.Count); Assert.Contains(expectedDel, lookup.Keys); // mess should subtract ins length from nm ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); // Different indel, reverse only IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read2, targetFinder, lookup, true, "chr1", 10); Assert.Equal(2, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); Assert.Contains(expectedIns, lookup.Keys); // mess should subtract ins length from nm ValidateEvidenceMatches( new IndelEvidence { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 1, Quality = 30, Forward = 0, Reverse = 1, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 } , lookup[expectedIns]); // Multi-indel var readPairMulti = TestHelpers.GetPair("5M1D1M1D4M", "5M1D1M1D4M", nm: 2, nm2: 2); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMulti.Read1, targetFinder, lookup, true, "chr1", 10); Assert.Equal(3, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); var expectedMulti = "chr1:104 NN>N|chr1:106 NN>N"; Assert.Contains(expectedMulti, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedMulti]); // Multi that are far apart - allow to track individually too. var readPairMultiFar = TestHelpers.GetPair("5M1D26M1D4M", "5M1D26M1D4M", nm: 2, nm2: 2); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMultiFar.Read1, targetFinder, lookup, true, "chr1", 10); Assert.Equal(5, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 4, LeftAnchor = 20, RightAnchor = 41, Mess = 4, Quality = 120, Forward = 2, Reverse = 1, Stitched = 1, ReputableSupport = 3, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); var expectedMultiFar = "chr1:104 NN>N|chr1:131 NN>N"; Assert.Contains(expectedMultiFar, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedMultiFar]); var expectedSecondSingleFromMulti = "chr1:131 NN>N"; Assert.Contains(expectedSecondSingleFromMulti, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 26, RightAnchor = 4, Mess = 1, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedSecondSingleFromMulti]); }
public void CollectIndelEvidence() { var targetFinder = new IndelTargetFinder(); var pairs = new List <PairResult>(); // Reputable read var pair1 = TestHelpers.GetPairResult(1, 0, classification: PairClassification.IndelUnstitchable, hasIndels: true, isReputableIndelContaining: true); // Less reputable var pair2 = TestHelpers.GetPairResult(1, 0, classification: PairClassification.IndelUnstitchable, hasIndels: true); pairs.Add(pair1); ConcurrentDictionary <string, IndelEvidence> indelLookup = new ConcurrentDictionary <string, IndelEvidence>(); var results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray()); // This is just a pass-through Assert.Equal(pairs.Count, results.Length); // Check indel evidence Assert.Equal(1, indelLookup.Count); var indel = indelLookup.First(); var expectedEvidence = new IndelEvidence() { Forward = 1, Reverse = 1, LeftAnchor = 10, RightAnchor = 10, ReputableSupport = 2 }; VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value); // Add on more indel evidence for the same one results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray()); Assert.Equal(pairs.Count, results.Length); Assert.Equal(1, indelLookup.Count); indel = indelLookup.First(); expectedEvidence = new IndelEvidence() { Forward = 2, Reverse = 2, LeftAnchor = 20, RightAnchor = 20, ReputableSupport = 4 }; VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value); // Add on some less reputable evidence pairs.Clear(); pairs.Add(pair2); results = IndelEvidenceCollector.CollectIndelEvidence(targetFinder, "chr1", indelLookup, pairs.ToArray()); Assert.Equal(pairs.Count, results.Length); Assert.Equal(1, indelLookup.Count); indel = indelLookup.First(); expectedEvidence = new IndelEvidence() { Forward = 3, Reverse = 3, LeftAnchor = 30, RightAnchor = 30, ReputableSupport = 4 }; VerifyIndelEvidence("chr1:6 N>NT", expectedEvidence, indel.Key, indel.Value); }
private bool ShouldRemoveVariant(int observationCount, float avgAnchorLeft, float avgAnchorRight, bool isStrong, IndelStatusCounter statusCounter, float avgQuals, float avgMess, int anchorLeft, int anchorRight, double edgeThreshold, IndelEvidence evidence) { if (observationCount < _foundThreshold || avgAnchorLeft < _anchorThreshold || avgAnchorRight < _anchorThreshold || avgMess > _maxMess) { if (isStrong) { evidence.Outcome = Outcome.Rescued; statusCounter.Rescued++; } else { evidence.Outcome = Outcome.BelowThreshold; statusCounter.BelowThreshold++; return(true); } } if (observationCount == 1 && (Math.Min(anchorLeft, anchorRight) < 5 || avgMess > 1 || avgQuals < 30)) { evidence.Outcome = Outcome.PoorSingle; statusCounter.PoorSingle++; // Even if we want to allow single-observation variants to be realigned against, maybe let's avoid the really junky ones return(true); } if ((observationCount <= edgeThreshold) && (avgMess > 2 || avgQuals < 25)) { evidence.Outcome = Outcome.PoorEdge; statusCounter.PoorEdge++; return(true); } return(false); }
private List <PreIndel> ExtractIndelsFromEntry(IndelEvidence indelMetrics, string keyString, IndelStatusCounter statusCounter, double edgeThreshold, bool allowRescue, List <PreIndel> indels) { var indelsToAdd = new List <PreIndel>(); var observationCount = indelMetrics.Observations; var anchorLeft = indelMetrics.LeftAnchor; var anchorRight = indelMetrics.RightAnchor; var mess = indelMetrics.Mess; var quals = indelMetrics.Quality; var fwdSupport = indelMetrics.Forward / (float)observationCount; var reverseSupport = indelMetrics.Reverse / (float)observationCount; var stitchedSupport = indelMetrics.Stitched / (float)observationCount; var reputableSupportFraction = indelMetrics.ReputableSupport / (float)observationCount; var numFromUnanchoredRepeat = indelMetrics.IsRepeat; var numFromMateUnmapped = indelMetrics.IsSplit; var avgAnchorLeft = anchorLeft / (float)observationCount; var avgAnchorRight = anchorRight / (float)observationCount; var avgQuals = quals / (float)observationCount; var avgMess = mess / (float)observationCount; // TODO clean this up, no more magic bool isStrong = false; if (allowRescue) { isStrong = IsStrong(avgQuals, reputableSupportFraction, avgAnchorLeft, avgMess, avgAnchorRight, reverseSupport, observationCount, fwdSupport, keyString, stitchedSupport); } if (indels.Count > 2) { Logger.WriteToLog( $"Can't support more than 2 indels in one read: ignoring multi-indel {keyString} (seen {observationCount} times)"); } else if (indels.Count > 1) { var indel1 = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); var indel2 = GetIndelFromEntry(indels[1], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); indel1.InMulti = true; indel2.InMulti = true; indel1.OtherIndel = Helper.CandidateToString(indel2); indel2.OtherIndel = Helper.CandidateToString(indel1); indelsToAdd.Add(indel1); indelsToAdd.Add(indel2); } else { var indel = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); indelsToAdd.Add(indel); } if (indels.Count == 1 && indelsToAdd[0].Length == 1 && (observationCount < _foundThreshold * 0.8 || observationCount <= 2)) { indelMetrics.Outcome = Outcome.SuperWeakSmall; return(null); } if (ShouldRemoveVariant(observationCount, avgAnchorLeft, avgAnchorRight, isStrong, statusCounter, avgQuals, avgMess, anchorLeft, anchorRight, edgeThreshold, indelMetrics)) { return(null); } statusCounter.Kept++; return(indelsToAdd); }
public void GetRealignablePreIndels() { var filterer = new BasicIndelFilterer(0, 0, false); // Good support, good anchors, good direction balance, low mess var goodEvidence = new IndelEvidence() { Observations = 10, LeftAnchor = 500, RightAnchor = 500, Mess = 3, Quality = 300, Forward = 3, Reverse = 3, Stitched = 4, ReputableSupport = 5, IsRepeat = 0, IsSplit = 0 }; // Bad left anchor var badLeftAnchor = new IndelEvidence() { Observations = 10, LeftAnchor = 100, RightAnchor = 900, Mess = 3, Quality = 300, Forward = 3, Reverse = 3, Stitched = 4, ReputableSupport = 5, IsRepeat = 0, IsSplit = 0 }; // Bad right anchor var badRightAnchor = new IndelEvidence() { Observations = 10, LeftAnchor = 900, RightAnchor = 100, Mess = 3, Quality = 300, Forward = 3, Reverse = 3, Stitched = 4, ReputableSupport = 5, IsRepeat = 0, IsSplit = 0 }; // Support too low var supportTooLow = new IndelEvidence() { Observations = 4, LeftAnchor = 200, RightAnchor = 200, Mess = 0, Quality = 240, Forward = 1, Reverse = 1, Stitched = 2, ReputableSupport = 4, IsRepeat = 0, IsSplit = 0 }; var supportTooLowAndIsMess = new IndelEvidence() { Observations = 4, LeftAnchor = 200, RightAnchor = 200, Mess = 3, Quality = 240, Forward = 1, Reverse = 1, Stitched = 2, ReputableSupport = 4, IsRepeat = 0, IsSplit = 0 }; var indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; var realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false); var indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(5, indels.Count()); // Filter by support only indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; filterer = new BasicIndelFilterer(5, 0, false); realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false); indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(3, indels.Count()); // Filter by anchor only // Note, by default we throw out anything with 0 observations (what does that even mean?) // Should keep chr1:123 A>ATG, chr3:123 A>ATG and chr4:123 A>ATG indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; filterer = new BasicIndelFilterer(0, 20, false); realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false); indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(3, indels.Count()); // Filter by anchor and support indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; filterer = new BasicIndelFilterer(5, 20, false); realignableIndels = filterer.GetRealignablePreIndels(indelsDict, false); indels = realignableIndels.SelectMany(x => x.Value); Assert.Single(indels); // Rescue good indel that doesn't meet the requirements // Low support but indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true); indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(2, indels.Count()); // Don't rescue stuff that falls below required minimum indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG", goodEvidence }, { "chr1:123 A>ATGC", badLeftAnchor }, { "chr2:123 ATG>A", badRightAnchor }, { "chr3:123 A>ATG", supportTooLow }, { "chr4:123 A>ATG", supportTooLowAndIsMess }, }; filterer = new BasicIndelFilterer(5, 20, false, strictFoundThreshold: 5); realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true); indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(1.0, indels.Count()); // Multis indelsDict = new Dictionary <string, IndelEvidence>() { { "chr1:123 A>ATG|chr1:140 C>CTG", goodEvidence }, }; filterer = new BasicIndelFilterer(5, 20, false, strictFoundThreshold: 5); realignableIndels = filterer.GetRealignablePreIndels(indelsDict, true); indels = realignableIndels.SelectMany(x => x.Value); Assert.Equal(2.0, indels.Count()); }