Beispiel #1
0
        // TODO I strongly advise we remove this logic altogether. It is brittle, incomplete, and hasn't been shown to reliably improve results.
        public Read GenerateNifiedMergedRead(AlignmentSet set, bool useSoftclippedBases)
        {
            var read1InsertionAdjustedEnd = set.PartnerRead1.ClipAdjustedEndPosition +
                                            set.PartnerRead1.CigarData.GetSuffixInsertionLength();
            var read2InsertionAdjustedEnd = set.PartnerRead2.ClipAdjustedEndPosition +
                                            set.PartnerRead2.CigarData.GetSuffixInsertionLength();

            var read1LongerThanRead2 = read2InsertionAdjustedEnd < read1InsertionAdjustedEnd;

            var furthestRight = read1LongerThanRead2 ? read1InsertionAdjustedEnd : read2InsertionAdjustedEnd;

            var nifiedStitchedLength = furthestRight + 1 - set.PartnerRead1.ClipAdjustedPosition;

            var prefixClip = set.PartnerRead1.CigarData.GetPrefixClip();
            var suffixClip = read1LongerThanRead2
                ? set.PartnerRead1.CigarData.GetSuffixClip()
                : set.PartnerRead2.CigarData.GetSuffixClip();

            if (read2InsertionAdjustedEnd == read1InsertionAdjustedEnd)
            {
                suffixClip = Math.Min(set.PartnerRead1.CigarData.GetSuffixClip(),
                                      set.PartnerRead2.CigarData.GetSuffixClip());
            }

            if (prefixClip + suffixClip >= nifiedStitchedLength)
            {
                throw new ArgumentException($"Reads cannnot be Nified using this simple algorithm. The prefix and suffix sofctlips overlap for reads: {set.PartnerRead1.Name} {set.PartnerRead1.Position}:{set.PartnerRead1.CigarData} and {set.PartnerRead2.Position}:{set.PartnerRead2.CigarData}");
            }
            var nifiedStitchedCigar = new CigarAlignment((prefixClip > 0 ? string.Format("{0}S", prefixClip) : "")
                                                         + string.Format("{0}M", nifiedStitchedLength - prefixClip - suffixClip)
                                                         + (suffixClip > 0 ? string.Format("{0}S", suffixClip) : ""));
            var beforeOverlap = (useSoftclippedBases ? set.PartnerRead2.ClipAdjustedPosition : set.PartnerRead2.Position) -
                                set.PartnerRead1.ClipAdjustedPosition;
            var afterOverlap = read1LongerThanRead2
                ? (read1InsertionAdjustedEnd - read2InsertionAdjustedEnd)
                : (read2InsertionAdjustedEnd - read1InsertionAdjustedEnd);
            var r1Forward = set.PartnerRead1.SequencedBaseDirectionMap.First() == DirectionType.Forward;
            var beforeOverlapDirection = r1Forward ? "F" : "R";
            var afterOverlapDirection  = read1LongerThanRead2 ? (r1Forward ? "F" : "R") : (r1Forward ? "R" : "F");

            var nifiedStitchedDirections = (beforeOverlap > 0
                ? string.Format("{0}{1}", beforeOverlap, beforeOverlapDirection)
                : "")
                                           + string.Format("{0}S", nifiedStitchedLength - beforeOverlap - afterOverlap)
                                           +
                                           (afterOverlap > 0 ? string.Format("{0}{1}", afterOverlap, afterOverlapDirection) : "");

            var mergedRead = new Read(set.PartnerRead1.Chromosome, new BamAlignment
            {
                Name      = set.PartnerRead1.Name,
                Bases     = new string('N', nifiedStitchedLength),
                Position  = Math.Min(set.PartnerRead1.Position - 1, set.PartnerRead2.Position - 1),
                Qualities = Enumerable.Repeat((byte)0, nifiedStitchedLength).ToArray(),
                CigarData = nifiedStitchedCigar
            })
            {
                StitchedCigar   = nifiedStitchedCigar,
                CigarDirections = new CigarDirection(nifiedStitchedDirections)
            };

            return(mergedRead);
        }
Beispiel #2
0
        public void TryStitch_NoXC_Unstitchable()
        {
            var read1 = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12345,
                                                    new CigarAlignment("8M"), qualityForAll: 30);

            var read2_noOverlap = DomainTestHelper.CreateRead("chr1", "A", 2384,
                                                              new CigarAlignment("1M"), qualityForAll: 30);

            var read2_overlap = DomainTestHelper.CreateRead("chr1", "ATCGTT", 12349,
                                                            new CigarAlignment("1I5M"), qualityForAll: 30);

            var read2_diffChrom = DomainTestHelper.CreateRead("chr2", "ATCGTT", 12349,
                                                              new CigarAlignment("6M"), qualityForAll: 30);

            var read2_nonOverlap_border = DomainTestHelper.CreateRead("chr1", "AT", 12343,
                                                                      new CigarAlignment("2M"), qualityForAll: 30);

            var stitcher = StitcherTestHelpers.GetStitcher(10);

            ;
            // -----------------------------------------------
            // Either of the partner reads is missing*
            // *(only read that could be missing is read 2, if read 1 was missing couldn't create alignment set)
            // -----------------------------------------------
            // Should throw an exception
            var alignmentSet = new AlignmentSet(read1, null);

            Assert.Throws <ArgumentException>(() => stitcher.TryStitch(alignmentSet));

            // -----------------------------------------------
            // No overlap, reads are far away
            // -----------------------------------------------
            // Shouldn't stitch
            alignmentSet = new AlignmentSet(read1, read2_noOverlap);
            stitcher.TryStitch(alignmentSet);
            Assert.Equal(2, alignmentSet.ReadsForProcessing.Count);
            StitcherTestHelpers.TestUnstitchableReads(read1, read2_noOverlap, 0, (unStitchableReads) =>
            {
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x)));
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_noOverlap, x)));
            });

            // -----------------------------------------------
            // No overlap, reads are directly neighboring
            // -----------------------------------------------
            // Shouldn't stitch
            alignmentSet = new AlignmentSet(read1, read2_nonOverlap_border);
            stitcher.TryStitch(alignmentSet);
            Assert.Equal(2, alignmentSet.ReadsForProcessing.Count);
            StitcherTestHelpers.TestUnstitchableReads(read1, read2_nonOverlap_border, 0, (unStitchableReads) =>
            {
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x)));
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_nonOverlap_border, x)));
            });

            // -----------------------------------------------
            // No overlap, reads on diff chromosomes
            // -----------------------------------------------
            // Shouldn't stitch
            alignmentSet = new AlignmentSet(read1, read2_diffChrom);
            stitcher.TryStitch(alignmentSet);
            Assert.Equal(2, alignmentSet.ReadsForProcessing.Count);
            StitcherTestHelpers.TestUnstitchableReads(read1, read2_diffChrom, 0, (unStitchableReads) =>
            {
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x)));
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_diffChrom, x)));
            });

            // -----------------------------------------------
            // Has overlap, but cigars are incompatible
            // -----------------------------------------------
            // Shouldn't stitch
            alignmentSet = new AlignmentSet(read1, read2_overlap);
            stitcher.TryStitch(alignmentSet);
            Assert.Equal(2, alignmentSet.ReadsForProcessing.Count);
            StitcherTestHelpers.TestUnstitchableReads(read1, read2_overlap, 0, (unStitchableReads) =>
            {
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x)));
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_overlap, x)));
            });

            // -----------------------------------------------
            // Has overlap, but cigars are incompatible, but read 2 starts with SC
            // -----------------------------------------------
            // Overlap is just S and I - should stitch
            // 5678----90123456789
            // MMMMIIII
            //     SSSSMMMM
            var read1_withIns = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12345,
                                                            new CigarAlignment("4M4I"), qualityForAll: 30);
            var read2_withSC = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12349,
                                                           new CigarAlignment("4S4M"), qualityForAll: 30);

            alignmentSet = new AlignmentSet(read1_withIns, read2_withSC);

            //stitcher.TryStitch(alignmentSet);
            //Assert.Equal(1, alignmentSet.ReadsForProcessing.Count);
            //Assert.Equal("4M4I4M", alignmentSet.ReadsForProcessing.First().CigarData.ToString());

            // Overlap is S and some disagreeing ops with I - should not stitch
            read2_withSC = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12348,
                                                       new CigarAlignment("2S1D6M"), qualityForAll: 30);
            alignmentSet = new AlignmentSet(read1_withIns, read2_withSC);
            stitcher.TryStitch(alignmentSet);
            Assert.Equal(2, alignmentSet.ReadsForProcessing.Count);
            StitcherTestHelpers.TestUnstitchableReads(read1_withIns, read2_withSC, 0, (unStitchableReads) =>
            {
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1_withIns, x)));
                Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_withSC, x)));
            });
        }
Beispiel #3
0
        private void ExecuteConsensusTests(bool nifyDisagreements)
        {
            // 1234...   1 - - 2 3 4 5 6 - - 7 8 9 0 //Reference Positions
            // Read1     X X X X X X X X - - - - -
            // Read1     M I I M M M M M - - - - -
            // Read1     T T T T T T T T - - - - -
            // Read2     - - - X X X X X X X X - -
            // Read2     - - - M M M M M I M M - -
            // Read2     - - - A A A A A A A A - -

            var r1qualities = 30;
            var r2qualities = 20;

            var read1 = DomainTestHelper.CreateRead("chr1", "TTTTTTTT", 12341,
                                                    new CigarAlignment("1M2I5M"), qualityForAll: (byte)r1qualities);

            var read2 = DomainTestHelper.CreateRead("chr1", "AAAAAAAA", 12342,
                                                    new CigarAlignment("5M1I2M"), qualityForAll: (byte)r2qualities);

            var stitcher     = StitcherTestHelpers.GetStitcher(10, false, nifyDisagreements: nifyDisagreements);
            var alignmentSet = new AlignmentSet(read1, read2);

            stitcher.TryStitch(alignmentSet);

            // Merged    A T C ? ? ? ? ? T C G - -
            // Merged    M I I M M M M M I M M - -
            // Merged    0 1 2 3 4 5 6 7 8 9 0 1 2

            var overlapStart  = 3;
            var overlapEnd    = 8;
            var overlapLength = 5;

            //Consensus sequence should have everything from read1 for positions before overlap
            var mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);

            Assert.Equal("TTT", mergedRead.Sequence.Substring(0, overlapStart));

            //Consensus sequence should have everything from read2 for positions after overlap
            Assert.Equal("AAA", mergedRead.Sequence.Substring(overlapEnd, 3));

            //Consensus sequence should have an N where we have two high-quality (both above min) disagreeing bases
            Assert.Equal(nifyDisagreements? "NNNNN":"TTTTT", mergedRead.Sequence.Substring(overlapStart, 5));

            //Consensus sequence should have 0 quality where we have two high-quality (both above min) disagreeing bases
            Assert.True(mergedRead.Qualities.Take(overlapStart).All(q => q == r1qualities));
            Assert.True(mergedRead.Qualities.Skip(overlapStart).Take(overlapLength).All(q => q == 0));
            Assert.True(mergedRead.Qualities.Skip(overlapEnd).Take(mergedRead.Sequence.Length - overlapEnd).All(q => q == r2qualities));

            //Consensus sequence should take higher quality base if one or more of the bases is below min quality

            //Read 2 trumps whole overlap
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 };
            read2.BamAlignment.Qualities = new byte[] { 40, 40, 40, 40, 40, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "NNNNN" : read2.Sequence.Substring(0, 5), mergedRead.Sequence.Substring(overlapStart, 5));
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTAAAAAAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 40, 40, 40, 40, 20, 19, 18 }, mergedRead.Qualities);

            //Read 1 trumps whole overlap
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 40, 40, 40, 40, 40 };
            read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5));
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 40, 40, 40, 40, 20, 19, 18 }, mergedRead.Qualities);

            //Little bit of each
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 45, 5, 45, 5 };
            read2.BamAlignment.Qualities = new byte[] { 40, 5, 40, 5, 40, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTATATAAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 45, 40, 45, 40, 20, 19, 18 }, mergedRead.Qualities);

            //Consensus sequence should take base and assign the higher quality if both bases agree
            var read2_agreeingBases = DomainTestHelper.CreateRead("chr1", "TTTTTTTT", 12342,
                                                                  new CigarAlignment("5M1I2M"), new byte[] { 40, 5, 40, 5, 40, 20, 19, 18 });

            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 45, 5, 45, 5 };
            alignmentSet = new AlignmentSet(read1, read2_agreeingBases);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal("TTTTTTTTTTT", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(new byte[] { 30, 30, 30, 45, 50, 45, 50, 45, 20, 19, 18 }, mergedRead.Qualities);

            //Bases disagree and both are below minimum quality, read1>read2 : take base/q from read1
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 8, 8, 8, 8, 8 };
            read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5));
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 8, 8, 8, 8, 8, 20, 19, 18 }, mergedRead.Qualities);

            //Bases disagree and both are below minimum quality, read2>read1 : take base/q from read2
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 };
            read2.BamAlignment.Qualities = new byte[] { 8, 8, 8, 8, 8, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "NNNNN" : read2.Sequence.Substring(0, 5), mergedRead.Sequence.Substring(overlapStart, 5));
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTAAAAAAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 8, 8, 8, 8, 8, 20, 19, 18 }, mergedRead.Qualities);

            //Bases disagree and both are below minimum quality, read1==read2 : take base/q from read1
            read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 };
            read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 };
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5));
            Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence);
            StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 5, 5, 5, 5, 5, 20, 19, 18 }, mergedRead.Qualities);
        }
Beispiel #4
0
        public void TryStitch_ReCo()
        {
            // Real example from ReCo, was failing to generate the correct stitched cigar
            var read1Bases =
                "GTACTCCTACAGTCCCACCCCTCCCCTATAAACCTTATGAATCCCCGTTCACTTAGATGCCAGCTTGGCAAGGAAGGGAAGTACACATCTGTTGACAGTAATGAAATATCCTTGATAAGGATTTAAATTTTGGATGTGCTG";
            var read2Bases =
                "ACCTACAGTCCCACCCCTCCCCTATAAACCTTAGGAATCCCCGTTCACTTAGATGCCAGCTTGGCAAGGAAGGGAAGTACACATCTGTTGACAGTAATGAAATATCCTTGATAAGGATTTAAATTTTGGATGTGCTGAGCT";

            // 8             9
            // 3 4 5 6 7 8 9 0 1 2
            // s s s s s M M M M M ...
            // - - - - M M M M M M ...
            // F F F F R S S S S S ... // Stitched directions if we don't allow softclip to contribute
            // F F F F S S S S S S ... // Stitched directions if we do allow softclip to contribute

            var read1 = DomainTestHelper.CreateRead("chr21", read1Bases, 16685488,
                                                    new CigarAlignment("5S136M"));

            var read2 = DomainTestHelper.CreateRead("chr21", read2Bases, 16685487,
                                                    new CigarAlignment("137M4S"));

            StitcherTestHelpers.SetReadDirections(read2, DirectionType.Reverse);

            var stitcher     = new BasicStitcher(10, useSoftclippedBases: false);
            var alignmentSet = new AlignmentSet(read1, read2);

            stitcher.TryStitch(alignmentSet);
            // Without allowing softclips to count to support, should still get a M at an M/S overlap, but it won't be stitched.
            var mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);

            Assert.Equal("4S137M4S", mergedRead.CigarData.ToString());
            var expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> >
            {
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 1),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 136),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4)
            });

            StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray());

            stitcher     = new BasicStitcher(10, useSoftclippedBases: true);
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal("4S137M4S", mergedRead.CigarData.ToString());
            expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> >
            {
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 1),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 136),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4)
            });
            StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray());

            // If we're not ignoring probe softclips, go back to the original expected directions (1 more stitched from probe)
            stitcher     = new BasicStitcher(10, useSoftclippedBases: true, ignoreProbeSoftclips: false);
            alignmentSet = new AlignmentSet(read1, read2);
            stitcher.TryStitch(alignmentSet);
            mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet);
            Assert.Equal("4S137M4S", mergedRead.CigarData.ToString());
            expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> >
            {
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 137),
                StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4)
            });
            StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray());
        }