// attempt to stitch reads according to where they align to the reference // this is a conservative approach that requires consensus between individual reads' cigars. it is not the same approach as implemented in amplicon aligner private CigarAlignment CalculateStitchedCigar(Read read1, Read read2, OverlapBoundary overlapBoundary) { // compare position maps between read1 and read2 in overlap region for (var i = 0; i < overlapBoundary.OverlapLength; i++) { if (read1.PositionMap[overlapBoundary.Read1.StartIndex + i] != read2.PositionMap[overlapBoundary.Read2.StartIndex + i]) { throw new ReadsNotStitchableException("Disagreement in read position maps"); } } // --------------------------- // assemble stitched cigar // - take cigar from read1 up to anchor, then from read2 at anchor and beyond. this will take read2's cigar for any overlap. var stitchedCigar = read1.CigarData.GetTrimmed(overlapBoundary.Read1.StartIndex); for (var i = 0; i < read2.CigarData.Count; i++) { var operation = read2.CigarData[i]; if (!(i == 0 && operation.Type == 'S')) { stitchedCigar.Add(new CigarOp(operation.Type, operation.Length)); } } stitchedCigar.Compress(); //ValidateCigar(stitchedCigar, read1.CigarData, read2.CigarData); return(stitchedCigar); }
// get overlap boundary relative to reads // first key is read1 index position that is the start of the overlap // second key is read2 index position that is the end of the overlap public static OverlapBoundary GetOverlapBoundary(Read read1, Read read2, string stitchedCigar) { var totalStitchedLength = new CigarAlignment(stitchedCigar).GetReadSpan(); var overlapLength = read1.Sequence.Length + read2.Sequence.Length - (int)totalStitchedLength; if (overlapLength <= 0) { throw new ReadsNotStitchableException(string.Format("No overlap between reads {0} and {1}", read1.ToString(), read2.ToString())); } //In this case, we'll just assume that the stitching is simple and the overlap reaches exactly as far back into R1 as it does forward into R2. var overlapBoundary = new OverlapBoundary() { OverlapLength = overlapLength, Read1 = new ReadIndexBoundary() { StartIndex = read1.Sequence.Length - overlapLength, EndIndex = read1.Sequence.Length - 1 }, Read2 = new ReadIndexBoundary() { StartIndex = 0, EndIndex = overlapLength - 1 } }; return(overlapBoundary); }
private CigarAlignment GetStitchedCigar(AlignmentSet set, OverlapBoundary overlapBoundary) { // preferentially take XC tag if available if (set.PartnerRead1.StitchedCigar != null && set.PartnerRead2.StitchedCigar != null) { // make sure it corresponds to expected length var stitchedCigar = set.PartnerRead1.StitchedCigar; if (stitchedCigar.GetReadSpan() == overlapBoundary.TotalStitchedLength) { return(stitchedCigar); } } return(CalculateStitchedCigar(set.PartnerRead1, set.PartnerRead2, overlapBoundary)); }
// generate consensus read based on stitched cigar and previously determined overlap boundaries // todo try different consensus approaches protected Read GenerateConsensus(Read read1, Read read2, CigarAlignment stitchedCigar, OverlapBoundary overlapBoundary) { var totalStitchedLength = (int)stitchedCigar.GetReadSpan(); // init consensus var stitchedBasesSb = new StringBuilder(); var stitchedQualities = new byte[totalStitchedLength]; var directionMap = new DirectionType[totalStitchedLength]; // take everything from read1 for positions before overlap stitchedBasesSb.Append(read1.Sequence.Substring(0, overlapBoundary.Read1.StartIndex)); Array.Copy(read1.Qualities, stitchedQualities, overlapBoundary.Read1.StartIndex); for (var i = 0; i < overlapBoundary.Read1.StartIndex; i++) { directionMap[i] = read1.DirectionMap[i]; } // determine consensus base + qscore in the overlap region for (int overlapIdx = 0; overlapIdx < overlapBoundary.OverlapLength; overlapIdx++) { var read1Index = overlapBoundary.Read1.StartIndex + overlapIdx; var read2Index = overlapBoundary.Read2.StartIndex + overlapIdx; var base1 = read1.Sequence[read1Index]; var base2 = read2.Sequence[read2Index]; var q1 = read1.Qualities[read1Index]; var q2 = read2.Qualities[read2Index]; directionMap[read1Index] = DirectionType.Stitched; if (base1 == base2) { stitchedBasesSb.Append(base1); stitchedQualities[read1Index] = Math.Max(q1, q2); } else { if (q1 >= _minBaseCallQuality && q2 >= _minBaseCallQuality) { // we have two high-quality disagreeing bases stitchedBasesSb.Append('N'); stitchedQualities[read1Index] = 0; } else { // take the higher quality base stitchedBasesSb.Append(q1 < q2 ? base2 : base1); stitchedQualities[read1Index] = Math.Max(q1, q2); } } } // take everything from read2 for positions after overlap stitchedBasesSb.Append(read2.Sequence.Substring(overlapBoundary.Read2.EndIndex + 1)); Array.Copy(read2.Qualities, overlapBoundary.Read2.EndIndex + 1, stitchedQualities, overlapBoundary.Read1.EndIndex + 1, read2.Sequence.Length - overlapBoundary.Read2.EndIndex - 1); for (var i = overlapBoundary.Read1.EndIndex + 1; i < directionMap.Length; i++) { directionMap[i] = read2.DirectionMap[overlapBoundary.Read2.EndIndex + 1 + i - (overlapBoundary.Read1.EndIndex + 1)]; } var mergedRead = new Read(read1.Chromosome, new BamAlignment() { Bases = stitchedBasesSb.ToString(), Position = read1.Position - 1, Qualities = stitchedQualities, CigarData = stitchedCigar }, true) { DirectionMap = directionMap, StitchedCigar = stitchedCigar }; return(mergedRead); }
public OverlapBoundary GetOverlapBoundary(Read read1, Read read2) { var read1ReferencePositions = read1.PositionMap.Where(p => p != -1).ToList(); if (read2.Position < read1ReferencePositions.Min() || read2.Position > read1ReferencePositions.Max()) { return(null); // no overlap } var overlapBoundary = new OverlapBoundary(); // find anchor or read1 on read2 var read1MaxPosition = read1.PositionMap.Max(); var indexOfR2StartInR1 = FindPosition(read1.PositionMap, read2.Position); var indexOfR1MaxPosInR2 = FindPosition(read2.PositionMap, read1MaxPosition); // grab a valid anchor and compute overlap from there // don't rely on position map once you have a valid anchor if (indexOfR2StartInR1 != -1) { var remainingR1Length = read1.ReadLength - (int)read1.CigarData.GetSuffixClip() - indexOfR2StartInR1; overlapBoundary.OverlapLength = Math.Min((int)read2.CigarData.GetReadSpanBetweenClippedEnds(), remainingR1Length); overlapBoundary.Read1 = new ReadIndexBoundary() { StartIndex = indexOfR2StartInR1, EndIndex = indexOfR2StartInR1 + overlapBoundary.OverlapLength - 1 }; overlapBoundary.Read2 = new ReadIndexBoundary() { StartIndex = (int)read2.CigarData.GetPrefixClip(), EndIndex = (int)read2.CigarData.GetPrefixClip() + overlapBoundary.OverlapLength - 1 }; } else if (indexOfR1MaxPosInR2 != -1) { var preceedingR2Length = indexOfR1MaxPosInR2 - (int)read2.CigarData.GetPrefixClip() + 1; overlapBoundary.OverlapLength = Math.Min((int)read1.CigarData.GetReadSpanBetweenClippedEnds(), preceedingR2Length); overlapBoundary.Read1 = new ReadIndexBoundary() { StartIndex = read1.ReadLength - (int)read1.CigarData.GetSuffixClip() - overlapBoundary.OverlapLength, EndIndex = read1.ReadLength - (int)read1.CigarData.GetSuffixClip() - 1 }; overlapBoundary.Read2 = new ReadIndexBoundary() { StartIndex = indexOfR1MaxPosInR2 - overlapBoundary.OverlapLength + 1, EndIndex = indexOfR1MaxPosInR2 }; } else { throw new Exception("Unable to find anchor between reads"); } //overlapBoundary.R1ClippedEndIndex = read1.ReadLength - (int)read1.CigarData.GetSuffixClip() - 1; //overlapBoundary.R2ClippedStartIndex = (int)read2.CigarData.GetPrefixClip(); //overlapBoundary.R1ClippedEndIndex - overlapBoundary.IndexOfR2StartInR1 + 1 overlapBoundary.TotalStitchedLength = overlapBoundary.Read1.StartIndex + overlapBoundary.OverlapLength + (read2.ReadLength - overlapBoundary.Read2.EndIndex - 1); return(overlapBoundary); }