public static PairResult[] CollectIndelEvidence(IndelTargetFinder targetFinder, string chrom, ConcurrentDictionary <string, IndelEvidence> indelLookup, PairResult[] pairs) { var localResult = new Dictionary <string, IndelEvidence>(); foreach (var p in pairs) { var MinMapQualityForEvidence = 10; var pClassification = p.Classification; if (p.HasIndels) { // Reputable indel reads will not have lots of mismatches or softclips p.OriginalIndelsR1 = new List <PreIndel>(); p.OriginalIndelsR2 = new List <PreIndel>(); var stitched = pClassification == PairClassification.ImperfectStitched || pClassification == PairClassification.MessyStitched || pClassification == PairClassification.PerfectStitched || pClassification == PairClassification.SingleMismatchStitched; // Assumes we're not ever dealing with stitched reads here var aln = p.ReadPair.Read1; if (aln != null && aln.IsMapped() && aln.RefID >= 0) { // TODO would anything ever be coming through here that is _not_ ClassificationContainsQualityIndels? if (ClassificationContainsQualityIndels(pClassification)) { p.OriginalIndelsR1 = IndelEvidenceHelper.FindIndelsAndRecordEvidence(aln, targetFinder, localResult, p.IsReputableIndelContaining, chrom, MinMapQualityForEvidence, stitched); } } aln = p.ReadPair.Read2; if (aln != null && aln.IsMapped() && aln.RefID >= 0) { // TODO would anything ever be coming through here that is _not_ ClassificationContainsQualityIndels? if (ClassificationContainsQualityIndels(pClassification)) { p.OriginalIndelsR2 = IndelEvidenceHelper.FindIndelsAndRecordEvidence(aln, targetFinder, localResult, p.IsReputableIndelContaining, chrom, MinMapQualityForEvidence, stitched); } } } } foreach (var kvp in localResult) { indelLookup.AddOrUpdate(kvp.Key, kvp.Value, (k, v) => { v.AddIndelEvidence(kvp.Value); return(v); }); } return(pairs); }
public void CollectEvidence(BamAlignment alignment, bool isReputable, bool isStitched, string chromosome) { IndelEvidenceHelper.FindIndelsAndRecordEvidence(alignment, _targetFinder, _lookup, isReputable, chromosome, 30, isStitched); }
public void FindIndelsAndRecordEvidence() { var readPair = TestHelpers.GetPair("5M1D5M", "5M2I4M", nm2: 3); var readPair2 = TestHelpers.GetPair("3M1D8M", "5M1D5M", nm2: 4); var targetFinder = new IndelTargetFinder(); var lookup = new Dictionary <string, IndelEvidence>(); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10); var expectedDel = "chr1:104 NN>N"; var expectedIns = "chr1:104 N>NTT"; Assert.Equal(1.0, lookup.Count); Assert.Equal(expectedDel, lookup.Keys.First()); //obs,left,right,mess,quals,fwd,reverse,stitched,reput var evidence = new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 5, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }; ValidateEvidenceMatches(evidence, lookup[expectedDel]); // Build evidence for same indel, let's call it stitched this time IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10, true); Assert.Equal(1.0, lookup.Count); Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches(new IndelEvidence() { Observations = 2, LeftAnchor = 10, RightAnchor = 10, Mess = 0, Quality = 60, Forward = 1, Reverse = 0, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); // Build evidence for same indel from a different read, this one's not reputable and is reverse IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair2.Read2, targetFinder, lookup, false, "chr1", 10); Assert.Equal(1.0, lookup.Count); Assert.Contains(expectedDel, lookup.Keys); // mess should subtract ins length from nm ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); // Different indel, reverse only IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read2, targetFinder, lookup, true, "chr1", 10); Assert.Equal(2, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); Assert.Contains(expectedIns, lookup.Keys); // mess should subtract ins length from nm ValidateEvidenceMatches( new IndelEvidence { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 1, Quality = 30, Forward = 0, Reverse = 1, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 } , lookup[expectedIns]); // Multi-indel var readPairMulti = TestHelpers.GetPair("5M1D1M1D4M", "5M1D1M1D4M", nm: 2, nm2: 2); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMulti.Read1, targetFinder, lookup, true, "chr1", 10); Assert.Equal(3, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 3, LeftAnchor = 15, RightAnchor = 15, Mess = 3, Quality = 90, Forward = 1, Reverse = 1, Stitched = 1, ReputableSupport = 2, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); var expectedMulti = "chr1:104 NN>N|chr1:106 NN>N"; Assert.Contains(expectedMulti, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedMulti]); // Multi that are far apart - allow to track individually too. var readPairMultiFar = TestHelpers.GetPair("5M1D26M1D4M", "5M1D26M1D4M", nm: 2, nm2: 2); IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMultiFar.Read1, targetFinder, lookup, true, "chr1", 10); Assert.Equal(5, lookup.Count); // Original del shouldn't have changed Assert.Contains(expectedDel, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 4, LeftAnchor = 20, RightAnchor = 41, Mess = 4, Quality = 120, Forward = 2, Reverse = 1, Stitched = 1, ReputableSupport = 3, IsRepeat = 0, IsSplit = 0 }, lookup[expectedDel]); var expectedMultiFar = "chr1:104 NN>N|chr1:131 NN>N"; Assert.Contains(expectedMultiFar, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 5, RightAnchor = 4, Mess = 0, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedMultiFar]); var expectedSecondSingleFromMulti = "chr1:131 NN>N"; Assert.Contains(expectedSecondSingleFromMulti, lookup.Keys); ValidateEvidenceMatches( new IndelEvidence() { Observations = 1, LeftAnchor = 26, RightAnchor = 4, Mess = 1, Quality = 30, Forward = 1, Reverse = 0, Stitched = 0, ReputableSupport = 1, IsRepeat = 0, IsSplit = 0 }, lookup[expectedSecondSingleFromMulti]); }