public static PairResult[] CollectIndelEvidence(IndelTargetFinder targetFinder, string chrom,
                                                        ConcurrentDictionary <string, IndelEvidence> indelLookup, PairResult[] pairs)
        {
            var localResult = new Dictionary <string, IndelEvidence>();

            foreach (var p in pairs)
            {
                var MinMapQualityForEvidence = 10;
                var pClassification          = p.Classification;
                if (p.HasIndels)
                {
                    // Reputable indel reads will not have lots of mismatches or softclips

                    p.OriginalIndelsR1 = new List <PreIndel>();
                    p.OriginalIndelsR2 = new List <PreIndel>();

                    var stitched = pClassification == PairClassification.ImperfectStitched ||
                                   pClassification == PairClassification.MessyStitched ||
                                   pClassification == PairClassification.PerfectStitched ||
                                   pClassification == PairClassification.SingleMismatchStitched;

                    // Assumes we're not ever dealing with stitched reads here
                    var aln = p.ReadPair.Read1;

                    if (aln != null && aln.IsMapped() && aln.RefID >= 0)
                    {
                        // TODO would anything ever be coming through here that is _not_ ClassificationContainsQualityIndels?
                        if (ClassificationContainsQualityIndels(pClassification))
                        {
                            p.OriginalIndelsR1 = IndelEvidenceHelper.FindIndelsAndRecordEvidence(aln, targetFinder, localResult,
                                                                                                 p.IsReputableIndelContaining, chrom, MinMapQualityForEvidence, stitched);
                        }
                    }

                    aln = p.ReadPair.Read2;
                    if (aln != null && aln.IsMapped() && aln.RefID >= 0)
                    {
                        // TODO would anything ever be coming through here that is _not_ ClassificationContainsQualityIndels?
                        if (ClassificationContainsQualityIndels(pClassification))
                        {
                            p.OriginalIndelsR2 = IndelEvidenceHelper.FindIndelsAndRecordEvidence(aln, targetFinder, localResult,
                                                                                                 p.IsReputableIndelContaining, chrom, MinMapQualityForEvidence, stitched);
                        }
                    }
                }
            }

            foreach (var kvp in localResult)
            {
                indelLookup.AddOrUpdate(kvp.Key, kvp.Value, (k, v) =>
                {
                    v.AddIndelEvidence(kvp.Value);
                    return(v);
                });
            }

            return(pairs);
        }
 public void CollectEvidence(BamAlignment alignment, bool isReputable, bool isStitched, string chromosome)
 {
     IndelEvidenceHelper.FindIndelsAndRecordEvidence(alignment, _targetFinder, _lookup, isReputable, chromosome, 30, isStitched);
 }
Exemplo n.º 3
0
        public void FindIndelsAndRecordEvidence()
        {
            var readPair     = TestHelpers.GetPair("5M1D5M", "5M2I4M", nm2: 3);
            var readPair2    = TestHelpers.GetPair("3M1D8M", "5M1D5M", nm2: 4);
            var targetFinder = new IndelTargetFinder();
            var lookup       = new Dictionary <string, IndelEvidence>();

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10);

            var expectedDel = "chr1:104 NN>N";
            var expectedIns = "chr1:104 N>NTT";

            Assert.Equal(1.0, lookup.Count);
            Assert.Equal(expectedDel, lookup.Keys.First());

            //obs,left,right,mess,quals,fwd,reverse,stitched,reput
            var evidence = new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 5,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            };

            ValidateEvidenceMatches(evidence, lookup[expectedDel]);

            // Build evidence for same indel, let's call it stitched this time
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read1, targetFinder, lookup, true, "chr1", 10, true);
            Assert.Equal(1.0, lookup.Count);
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(new IndelEvidence()
            {
                Observations     = 2,
                LeftAnchor       = 10,
                RightAnchor      = 10,
                Mess             = 0,
                Quality          = 60,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            // Build evidence for same indel from a different read, this one's not reputable and is reverse
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair2.Read2, targetFinder, lookup, false, "chr1", 10);
            Assert.Equal(1.0, lookup.Count);
            Assert.Contains(expectedDel, lookup.Keys);
            // mess should subtract ins length from nm
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            // Different indel, reverse only
            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPair.Read2, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(2, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);

            Assert.Contains(expectedIns, lookup.Keys);
            // mess should subtract ins length from nm
            ValidateEvidenceMatches(
                new IndelEvidence
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 1,
                Quality          = 30,
                Forward          = 0,
                Reverse          = 1,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }
                , lookup[expectedIns]);


            // Multi-indel
            var readPairMulti = TestHelpers.GetPair("5M1D1M1D4M", "5M1D1M1D4M", nm: 2, nm2: 2);

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMulti.Read1, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(3, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 3,
                LeftAnchor       = 15,
                RightAnchor      = 15,
                Mess             = 3,
                Quality          = 90,
                Forward          = 1,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 2,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);
            var expectedMulti = "chr1:104 NN>N|chr1:106 NN>N";

            Assert.Contains(expectedMulti, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedMulti]);

            // Multi that are far apart - allow to track individually too.
            var readPairMultiFar = TestHelpers.GetPair("5M1D26M1D4M", "5M1D26M1D4M", nm: 2, nm2: 2);

            IndelEvidenceHelper.FindIndelsAndRecordEvidence(readPairMultiFar.Read1, targetFinder, lookup, true, "chr1", 10);
            Assert.Equal(5, lookup.Count);
            // Original del shouldn't have changed
            Assert.Contains(expectedDel, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 4,
                LeftAnchor       = 20,
                RightAnchor      = 41,
                Mess             = 4,
                Quality          = 120,
                Forward          = 2,
                Reverse          = 1,
                Stitched         = 1,
                ReputableSupport = 3,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedDel]);
            var expectedMultiFar = "chr1:104 NN>N|chr1:131 NN>N";

            Assert.Contains(expectedMultiFar, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 5,
                RightAnchor      = 4,
                Mess             = 0,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedMultiFar]);
            var expectedSecondSingleFromMulti = "chr1:131 NN>N";

            Assert.Contains(expectedSecondSingleFromMulti, lookup.Keys);
            ValidateEvidenceMatches(
                new IndelEvidence()
            {
                Observations     = 1,
                LeftAnchor       = 26,
                RightAnchor      = 4,
                Mess             = 1,
                Quality          = 30,
                Forward          = 1,
                Reverse          = 0,
                Stitched         = 0,
                ReputableSupport = 1,
                IsRepeat         = 0,
                IsSplit          = 0
            }, lookup[expectedSecondSingleFromMulti]);
        }