// TODO I strongly advise we remove this logic altogether. It is brittle, incomplete, and hasn't been shown to reliably improve results. public Read GenerateNifiedMergedRead(AlignmentSet set, bool useSoftclippedBases) { var read1InsertionAdjustedEnd = set.PartnerRead1.ClipAdjustedEndPosition + set.PartnerRead1.CigarData.GetSuffixInsertionLength(); var read2InsertionAdjustedEnd = set.PartnerRead2.ClipAdjustedEndPosition + set.PartnerRead2.CigarData.GetSuffixInsertionLength(); var read1LongerThanRead2 = read2InsertionAdjustedEnd < read1InsertionAdjustedEnd; var furthestRight = read1LongerThanRead2 ? read1InsertionAdjustedEnd : read2InsertionAdjustedEnd; var nifiedStitchedLength = furthestRight + 1 - set.PartnerRead1.ClipAdjustedPosition; var prefixClip = set.PartnerRead1.CigarData.GetPrefixClip(); var suffixClip = read1LongerThanRead2 ? set.PartnerRead1.CigarData.GetSuffixClip() : set.PartnerRead2.CigarData.GetSuffixClip(); if (read2InsertionAdjustedEnd == read1InsertionAdjustedEnd) { suffixClip = Math.Min(set.PartnerRead1.CigarData.GetSuffixClip(), set.PartnerRead2.CigarData.GetSuffixClip()); } if (prefixClip + suffixClip >= nifiedStitchedLength) { throw new ArgumentException($"Reads cannnot be Nified using this simple algorithm. The prefix and suffix sofctlips overlap for reads: {set.PartnerRead1.Name} {set.PartnerRead1.Position}:{set.PartnerRead1.CigarData} and {set.PartnerRead2.Position}:{set.PartnerRead2.CigarData}"); } var nifiedStitchedCigar = new CigarAlignment((prefixClip > 0 ? string.Format("{0}S", prefixClip) : "") + string.Format("{0}M", nifiedStitchedLength - prefixClip - suffixClip) + (suffixClip > 0 ? string.Format("{0}S", suffixClip) : "")); var beforeOverlap = (useSoftclippedBases ? set.PartnerRead2.ClipAdjustedPosition : set.PartnerRead2.Position) - set.PartnerRead1.ClipAdjustedPosition; var afterOverlap = read1LongerThanRead2 ? (read1InsertionAdjustedEnd - read2InsertionAdjustedEnd) : (read2InsertionAdjustedEnd - read1InsertionAdjustedEnd); var r1Forward = set.PartnerRead1.SequencedBaseDirectionMap.First() == DirectionType.Forward; var beforeOverlapDirection = r1Forward ? "F" : "R"; var afterOverlapDirection = read1LongerThanRead2 ? (r1Forward ? "F" : "R") : (r1Forward ? "R" : "F"); var nifiedStitchedDirections = (beforeOverlap > 0 ? string.Format("{0}{1}", beforeOverlap, beforeOverlapDirection) : "") + string.Format("{0}S", nifiedStitchedLength - beforeOverlap - afterOverlap) + (afterOverlap > 0 ? string.Format("{0}{1}", afterOverlap, afterOverlapDirection) : ""); var mergedRead = new Read(set.PartnerRead1.Chromosome, new BamAlignment { Name = set.PartnerRead1.Name, Bases = new string('N', nifiedStitchedLength), Position = Math.Min(set.PartnerRead1.Position - 1, set.PartnerRead2.Position - 1), Qualities = Enumerable.Repeat((byte)0, nifiedStitchedLength).ToArray(), CigarData = nifiedStitchedCigar }) { StitchedCigar = nifiedStitchedCigar, CigarDirections = new CigarDirection(nifiedStitchedDirections) }; return(mergedRead); }
public void TryStitch_NoXC_Unstitchable() { var read1 = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12345, new CigarAlignment("8M"), qualityForAll: 30); var read2_noOverlap = DomainTestHelper.CreateRead("chr1", "A", 2384, new CigarAlignment("1M"), qualityForAll: 30); var read2_overlap = DomainTestHelper.CreateRead("chr1", "ATCGTT", 12349, new CigarAlignment("1I5M"), qualityForAll: 30); var read2_diffChrom = DomainTestHelper.CreateRead("chr2", "ATCGTT", 12349, new CigarAlignment("6M"), qualityForAll: 30); var read2_nonOverlap_border = DomainTestHelper.CreateRead("chr1", "AT", 12343, new CigarAlignment("2M"), qualityForAll: 30); var stitcher = StitcherTestHelpers.GetStitcher(10); ; // ----------------------------------------------- // Either of the partner reads is missing* // *(only read that could be missing is read 2, if read 1 was missing couldn't create alignment set) // ----------------------------------------------- // Should throw an exception var alignmentSet = new AlignmentSet(read1, null); Assert.Throws <ArgumentException>(() => stitcher.TryStitch(alignmentSet)); // ----------------------------------------------- // No overlap, reads are far away // ----------------------------------------------- // Shouldn't stitch alignmentSet = new AlignmentSet(read1, read2_noOverlap); stitcher.TryStitch(alignmentSet); Assert.Equal(2, alignmentSet.ReadsForProcessing.Count); StitcherTestHelpers.TestUnstitchableReads(read1, read2_noOverlap, 0, (unStitchableReads) => { Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x))); Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_noOverlap, x))); }); // ----------------------------------------------- // No overlap, reads are directly neighboring // ----------------------------------------------- // Shouldn't stitch alignmentSet = new AlignmentSet(read1, read2_nonOverlap_border); stitcher.TryStitch(alignmentSet); Assert.Equal(2, alignmentSet.ReadsForProcessing.Count); StitcherTestHelpers.TestUnstitchableReads(read1, read2_nonOverlap_border, 0, (unStitchableReads) => { Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x))); Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_nonOverlap_border, x))); }); // ----------------------------------------------- // No overlap, reads on diff chromosomes // ----------------------------------------------- // Shouldn't stitch alignmentSet = new AlignmentSet(read1, read2_diffChrom); stitcher.TryStitch(alignmentSet); Assert.Equal(2, alignmentSet.ReadsForProcessing.Count); StitcherTestHelpers.TestUnstitchableReads(read1, read2_diffChrom, 0, (unStitchableReads) => { Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x))); Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_diffChrom, x))); }); // ----------------------------------------------- // Has overlap, but cigars are incompatible // ----------------------------------------------- // Shouldn't stitch alignmentSet = new AlignmentSet(read1, read2_overlap); stitcher.TryStitch(alignmentSet); Assert.Equal(2, alignmentSet.ReadsForProcessing.Count); StitcherTestHelpers.TestUnstitchableReads(read1, read2_overlap, 0, (unStitchableReads) => { Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1, x))); Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_overlap, x))); }); // ----------------------------------------------- // Has overlap, but cigars are incompatible, but read 2 starts with SC // ----------------------------------------------- // Overlap is just S and I - should stitch // 5678----90123456789 // MMMMIIII // SSSSMMMM var read1_withIns = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12345, new CigarAlignment("4M4I"), qualityForAll: 30); var read2_withSC = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12349, new CigarAlignment("4S4M"), qualityForAll: 30); alignmentSet = new AlignmentSet(read1_withIns, read2_withSC); //stitcher.TryStitch(alignmentSet); //Assert.Equal(1, alignmentSet.ReadsForProcessing.Count); //Assert.Equal("4M4I4M", alignmentSet.ReadsForProcessing.First().CigarData.ToString()); // Overlap is S and some disagreeing ops with I - should not stitch read2_withSC = DomainTestHelper.CreateRead("chr1", "ATCGATCG", 12348, new CigarAlignment("2S1D6M"), qualityForAll: 30); alignmentSet = new AlignmentSet(read1_withIns, read2_withSC); stitcher.TryStitch(alignmentSet); Assert.Equal(2, alignmentSet.ReadsForProcessing.Count); StitcherTestHelpers.TestUnstitchableReads(read1_withIns, read2_withSC, 0, (unStitchableReads) => { Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read1_withIns, x))); Assert.Equal(1, unStitchableReads.Count(x => StitcherTestHelpers.VerifyReadsEqual(read2_withSC, x))); }); }
private void ExecuteConsensusTests(bool nifyDisagreements) { // 1234... 1 - - 2 3 4 5 6 - - 7 8 9 0 //Reference Positions // Read1 X X X X X X X X - - - - - // Read1 M I I M M M M M - - - - - // Read1 T T T T T T T T - - - - - // Read2 - - - X X X X X X X X - - // Read2 - - - M M M M M I M M - - // Read2 - - - A A A A A A A A - - var r1qualities = 30; var r2qualities = 20; var read1 = DomainTestHelper.CreateRead("chr1", "TTTTTTTT", 12341, new CigarAlignment("1M2I5M"), qualityForAll: (byte)r1qualities); var read2 = DomainTestHelper.CreateRead("chr1", "AAAAAAAA", 12342, new CigarAlignment("5M1I2M"), qualityForAll: (byte)r2qualities); var stitcher = StitcherTestHelpers.GetStitcher(10, false, nifyDisagreements: nifyDisagreements); var alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); // Merged A T C ? ? ? ? ? T C G - - // Merged M I I M M M M M I M M - - // Merged 0 1 2 3 4 5 6 7 8 9 0 1 2 var overlapStart = 3; var overlapEnd = 8; var overlapLength = 5; //Consensus sequence should have everything from read1 for positions before overlap var mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal("TTT", mergedRead.Sequence.Substring(0, overlapStart)); //Consensus sequence should have everything from read2 for positions after overlap Assert.Equal("AAA", mergedRead.Sequence.Substring(overlapEnd, 3)); //Consensus sequence should have an N where we have two high-quality (both above min) disagreeing bases Assert.Equal(nifyDisagreements? "NNNNN":"TTTTT", mergedRead.Sequence.Substring(overlapStart, 5)); //Consensus sequence should have 0 quality where we have two high-quality (both above min) disagreeing bases Assert.True(mergedRead.Qualities.Take(overlapStart).All(q => q == r1qualities)); Assert.True(mergedRead.Qualities.Skip(overlapStart).Take(overlapLength).All(q => q == 0)); Assert.True(mergedRead.Qualities.Skip(overlapEnd).Take(mergedRead.Sequence.Length - overlapEnd).All(q => q == r2qualities)); //Consensus sequence should take higher quality base if one or more of the bases is below min quality //Read 2 trumps whole overlap read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 }; read2.BamAlignment.Qualities = new byte[] { 40, 40, 40, 40, 40, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "NNNNN" : read2.Sequence.Substring(0, 5), mergedRead.Sequence.Substring(overlapStart, 5)); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTAAAAAAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 40, 40, 40, 40, 20, 19, 18 }, mergedRead.Qualities); //Read 1 trumps whole overlap read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 40, 40, 40, 40, 40 }; read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5)); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 40, 40, 40, 40, 20, 19, 18 }, mergedRead.Qualities); //Little bit of each read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 45, 5, 45, 5 }; read2.BamAlignment.Qualities = new byte[] { 40, 5, 40, 5, 40, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTATATAAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 40, 45, 40, 45, 40, 20, 19, 18 }, mergedRead.Qualities); //Consensus sequence should take base and assign the higher quality if both bases agree var read2_agreeingBases = DomainTestHelper.CreateRead("chr1", "TTTTTTTT", 12342, new CigarAlignment("5M1I2M"), new byte[] { 40, 5, 40, 5, 40, 20, 19, 18 }); read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 45, 5, 45, 5 }; alignmentSet = new AlignmentSet(read1, read2_agreeingBases); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal("TTTTTTTTTTT", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(new byte[] { 30, 30, 30, 45, 50, 45, 50, 45, 20, 19, 18 }, mergedRead.Qualities); //Bases disagree and both are below minimum quality, read1>read2 : take base/q from read1 read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 8, 8, 8, 8, 8 }; read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5)); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 8, 8, 8, 8, 8, 20, 19, 18 }, mergedRead.Qualities); //Bases disagree and both are below minimum quality, read2>read1 : take base/q from read2 read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 }; read2.BamAlignment.Qualities = new byte[] { 8, 8, 8, 8, 8, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "NNNNN" : read2.Sequence.Substring(0, 5), mergedRead.Sequence.Substring(overlapStart, 5)); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTAAAAAAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 8, 8, 8, 8, 8, 20, 19, 18 }, mergedRead.Qualities); //Bases disagree and both are below minimum quality, read1==read2 : take base/q from read1 read1.BamAlignment.Qualities = new byte[] { 30, 30, 30, 5, 5, 5, 5, 5 }; read2.BamAlignment.Qualities = new byte[] { 5, 5, 5, 5, 5, 20, 19, 18 }; alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal(nifyDisagreements ? "NNNNN" : read1.Sequence.Substring(3, 5), mergedRead.Sequence.Substring(overlapStart, 5)); Assert.Equal(nifyDisagreements ? "TTTNNNNNAAA" : "TTTTTTTTAAA", mergedRead.Sequence); StitcherTestHelpers.CompareQuality(nifyDisagreements ? new byte[] { 30, 30, 30, 0, 0, 0, 0, 0, 20, 19, 18 } : new byte[] { 30, 30, 30, 5, 5, 5, 5, 5, 20, 19, 18 }, mergedRead.Qualities); }
public void TryStitch_ReCo() { // Real example from ReCo, was failing to generate the correct stitched cigar var read1Bases = "GTACTCCTACAGTCCCACCCCTCCCCTATAAACCTTATGAATCCCCGTTCACTTAGATGCCAGCTTGGCAAGGAAGGGAAGTACACATCTGTTGACAGTAATGAAATATCCTTGATAAGGATTTAAATTTTGGATGTGCTG"; var read2Bases = "ACCTACAGTCCCACCCCTCCCCTATAAACCTTAGGAATCCCCGTTCACTTAGATGCCAGCTTGGCAAGGAAGGGAAGTACACATCTGTTGACAGTAATGAAATATCCTTGATAAGGATTTAAATTTTGGATGTGCTGAGCT"; // 8 9 // 3 4 5 6 7 8 9 0 1 2 // s s s s s M M M M M ... // - - - - M M M M M M ... // F F F F R S S S S S ... // Stitched directions if we don't allow softclip to contribute // F F F F S S S S S S ... // Stitched directions if we do allow softclip to contribute var read1 = DomainTestHelper.CreateRead("chr21", read1Bases, 16685488, new CigarAlignment("5S136M")); var read2 = DomainTestHelper.CreateRead("chr21", read2Bases, 16685487, new CigarAlignment("137M4S")); StitcherTestHelpers.SetReadDirections(read2, DirectionType.Reverse); var stitcher = new BasicStitcher(10, useSoftclippedBases: false); var alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); // Without allowing softclips to count to support, should still get a M at an M/S overlap, but it won't be stitched. var mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal("4S137M4S", mergedRead.CigarData.ToString()); var expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> > { StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 1), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 136), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4) }); StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray()); stitcher = new BasicStitcher(10, useSoftclippedBases: true); alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal("4S137M4S", mergedRead.CigarData.ToString()); expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> > { StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 1), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 136), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4) }); StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray()); // If we're not ignoring probe softclips, go back to the original expected directions (1 more stitched from probe) stitcher = new BasicStitcher(10, useSoftclippedBases: true, ignoreProbeSoftclips: false); alignmentSet = new AlignmentSet(read1, read2); stitcher.TryStitch(alignmentSet); mergedRead = StitcherTestHelpers.GetMergedRead(alignmentSet); Assert.Equal("4S137M4S", mergedRead.CigarData.ToString()); expectedDirections = StitcherTestHelpers.BuildDirectionMap(new List <IEnumerable <DirectionType> > { StitcherTestHelpers.BuildDirectionSegment(DirectionType.Forward, 4), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Stitched, 137), StitcherTestHelpers.BuildDirectionSegment(DirectionType.Reverse, 4) }); StitcherTestHelpers.VerifyDirectionType(expectedDirections, mergedRead.CigarDirections.Expand().ToArray()); }