private void AddDebugStatusCount(string status) { if (_debug) { _statusCounter.AddDebugStatusCount(status); } }
public StitchingInfo ReconcileSites(List <StitchedPosition> positions, bool r1IsReverse, out bool success, int prefixProbeClipEnd, int suffixProbeClipStart, bool pairIsOutie, bool leftAlignUnmapped = true) { var stitchingInfo = new StitchingInfo(); success = true; // Assumption is that exactly one read is forward and one read is reverse, and each component read is only one direction var r1DirectionType = r1IsReverse ? DirectionType.Reverse : DirectionType.Forward; var r2DirectionType = r1IsReverse ? DirectionType.Forward : DirectionType.Reverse; RedistributeSoftclips(positions, true); RedistributeSoftclips(positions, false); var indexInR1 = -1; var indexInR2 = -1; for (var i = 0; i < positions.Count; i++) { StitchedPosition positionBefore = null; if (i > 0) { positionBefore = positions[i - 1]; } var stitchPosition = positions[i]; if (stitchPosition.UnmappedPrefix.R1HasInsertion() && !stitchPosition.UnmappedPrefix.R2Ops.Any()) { if (stitchPosition.MappedSite?.R2Ops.Count(x => x.IsReferenceSpan()) > 0 && positionBefore?.MappedSite?.R2Ops.Count(x => x.IsReferenceSpan()) > 0) { success = false; return(null); } } if (stitchPosition.UnmappedPrefix.R2HasInsertion() && !stitchPosition.UnmappedPrefix.R1Ops.Any()) { if (stitchPosition.MappedSite?.R1Ops.Count(x => x.IsReferenceSpan()) > 0 && positionBefore?.MappedSite?.R1Ops.Count(x => x.IsReferenceSpan()) > 0) { success = false; return(null); } } foreach (var stitchSite in new List <StitchedSite>() { stitchPosition.UnmappedPrefix, stitchPosition.MappedSite }) { var unmappedSite = stitchSite as UnmappedStretch; var rightAlign = unmappedSite != null && unmappedSite.IsPrefix; //&& !unmappedSite.IsSuffix; var offset = Math.Abs(stitchSite.R1Ops.Count - stitchSite.R2Ops.Count); var r1StretchLonger = stitchSite.R1Ops.Count > stitchSite.R2Ops.Count; for (var j = 0; j < Math.Max(stitchSite.R1Ops.Count(), stitchSite.R2Ops.Count()); j++) { int r1StretchIndex; int r2StretchIndex; if (rightAlign) { r1StretchIndex = r1StretchLonger ? j : j - offset; r2StretchIndex = r1StretchLonger ? j - offset : j; } else { r1StretchIndex = j; r2StretchIndex = j; } var r1Op = r1StretchIndex >= 0 && stitchSite.R1Ops.Count > r1StretchIndex ? stitchSite.R1Ops[r1StretchIndex] : null; var r2Op = r2StretchIndex >= 0 && stitchSite.R2Ops.Count > r2StretchIndex ? stitchSite.R2Ops[r2StretchIndex] : null; var combinedOp = GetCombinedOp(r1Op, r2Op); if (combinedOp == null) { success = false; if (_debug) { Logger.WriteToLog(string.Format("Could not stitch operations {0} and {1}.", r1Op?.Type, r2Op?.Type)); } _statusCounter.AddDebugStatusCount("Could not stitch operations"); return(null); } stitchingInfo.StitchedCigar.Add(combinedOp); var r1opUsed = r1Op != null; var r2opUsed = r2Op != null; if (combinedOp.Type != 'S') { if (!_useSoftclippedBases && r2Op?.Type == 'S') { r2opUsed = false; } if (!_useSoftclippedBases && r1Op?.Type == 'S') { r1opUsed = false; } } if (r1opUsed && r1Op.IsReadSpan()) { indexInR1++; } if (r2opUsed && r2Op.IsReadSpan()) { indexInR2++; } if (_ignoreProbeSoftclips) { if (r1opUsed && r1Op.Type == 'S') { var isProbeSoftclip = (pairIsOutie && indexInR1 >= suffixProbeClipStart) || (!pairIsOutie && indexInR1 < prefixProbeClipEnd); // If this is a probe softclip, don't stitch it if (isProbeSoftclip && r2opUsed) { r1opUsed = false; if (pairIsOutie) { stitchingInfo.IgnoredProbeSuffixBases++; } else { stitchingInfo.IgnoredProbePrefixBases++; } } } if (r2opUsed && r2Op.Type == 'S') { var isProbeSoftclip = (pairIsOutie && indexInR2 < prefixProbeClipEnd) || (!pairIsOutie && indexInR2 >= suffixProbeClipStart); if (isProbeSoftclip && r1opUsed) { r2opUsed = false; if (pairIsOutie) { stitchingInfo.IgnoredProbePrefixBases++; } else { stitchingInfo.IgnoredProbeSuffixBases++; } } } // TODO support scenarios where R1 and R2 are both in probe softclips, if necessary. Otherwise, if this is really never going to happen, throw an exception if we see it. if (!r1opUsed && !r2opUsed) { throw new Exception("Stitching exception: Both R1 and R2 are in probe softclip regions at overlapping position."); } } var stitched = r1opUsed && r2opUsed; stitchingInfo.StitchedDirections.Directions.Add(new DirectionOp() { Direction = stitched ? DirectionType.Stitched : (r1opUsed ? r1DirectionType : r2DirectionType), Length = 1 }); } } } stitchingInfo.StitchedCigar.Compress(); stitchingInfo.StitchedDirections.Compress(); // Don't allow stitching that creates internal softclip if (stitchingInfo.StitchedCigar.HasInternalSoftclip()) { success = false; return(null); } return(stitchingInfo); }
public Read GenerateConsensusRead(Read read1, Read read2, StitchingInfo stitchingInfo, bool isOutie) { var stitchedBases = new List <char>(); var stitchedQualities = new List <byte>(); var expandedDirections = stitchingInfo.StitchedDirections.Expand(); var expandedCigar = stitchingInfo.StitchedCigar.Expand(); var startIndexInR1 = 0; var startIndexInR2 = 0; if (!_useSoftclippedBases) { // If we're not using softclipped bases to count toward stitching-ness, we need to fast-forward ahead of the sofctclip bases in R2 // That way, we essentially ignore the R2 softclipped bases and start from the "real" calls once we get to the Reverse-only region startIndexInR2 += (int)read2.CigarData.GetPrefixClip(); } if (_ignoreProbeSoftclips) { if (isOutie) { startIndexInR2 += stitchingInfo.IgnoredProbePrefixBases; } else { startIndexInR1 += stitchingInfo.IgnoredProbePrefixBases; } } var r1Indexer = new ReadIndexer(startIndexInR1); var r2Indexer = new ReadIndexer(startIndexInR2); ReadIndexer forwardReadIndexer; ReadIndexer reverseReadIndexer; // Assumption is that exactly one read is forward and one read is reverse, and each component read is only one direction if (read1.SequencedBaseDirectionMap.First() == DirectionType.Forward) { forwardReadIndexer = r1Indexer; reverseReadIndexer = r2Indexer; } else { forwardReadIndexer = r2Indexer; reverseReadIndexer = r1Indexer; } for (var i = 0; i < expandedCigar.Count; i++) { var cigarOp = expandedCigar[i]; var direction = expandedDirections[i]; if (cigarOp.Type == 'D') { continue; } var r1Index = r1Indexer.Index; var r2Index = r2Indexer.Index; if (r1Index >= 0 && r1Index < read1.BamAlignment.Bases.Length) { r1Indexer.BaseAtIndex = read1.BamAlignment.Bases[r1Index]; r1Indexer.QualityAtIndex = read1.Qualities[r1Index]; } else { r1Indexer.BaseAtIndex = null; r1Indexer.QualityAtIndex = null; } if (r2Index >= 0 && r2Index < read2.BamAlignment.Bases.Length) { r2Indexer.BaseAtIndex = read2.BamAlignment.Bases[r2Index]; r2Indexer.QualityAtIndex = read2.Qualities[r2Index]; } else { r2Indexer.BaseAtIndex = null; r2Indexer.QualityAtIndex = null; } switch (direction) { case DirectionType.Forward: if (forwardReadIndexer.BaseAtIndex == null) { throw new Exception("Forward base at index " + forwardReadIndexer.Index + " is null."); } stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); stitchedQualities.Add((byte)forwardReadIndexer.QualityAtIndex); forwardReadIndexer.Increment(); break; case DirectionType.Reverse: if (reverseReadIndexer.BaseAtIndex == null) { throw new Exception("Reverse base at index " + reverseReadIndexer.Index + " is null."); } stitchedBases.Add((char)reverseReadIndexer.BaseAtIndex); // TODO - stringbuilder instead? stitchedQualities.Add((byte)reverseReadIndexer.QualityAtIndex); reverseReadIndexer.Increment(); break; case DirectionType.Stitched: if (forwardReadIndexer.BaseAtIndex != null && reverseReadIndexer.BaseAtIndex != null) { if (forwardReadIndexer.BaseAtIndex == reverseReadIndexer.BaseAtIndex) { stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); var sticheredQuality = Convert.ToInt32((byte)forwardReadIndexer.QualityAtIndex) + Convert.ToInt32((byte)reverseReadIndexer.QualityAtIndex); stitchedQualities.Add((byte)sticheredQuality); } else //the bases disagree... { if (_nifyDisagreements) { // we have disagreeing bases AND we chose to always Nify them stitchedBases.Add('N'); stitchedQualities.Add(0); } else { if ((byte)forwardReadIndexer.QualityAtIndex >= reverseReadIndexer.QualityAtIndex) // Original stitching implementation -- TODO, reconcile this with new reqs. { stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); if (reverseReadIndexer.QualityAtIndex < _minBasecallQuality) { stitchedQualities.Add((byte)forwardReadIndexer.QualityAtIndex); } else { stitchedQualities.Add(0); } //this was a high Q disagreement, and dangerous! we will filter this base. } else //if ((byte)forwardReadIndexer.QualityAtIndex < reverseReadIndexer.QualityAtIndex) // Original stitching implementation { stitchedBases.Add((char)reverseReadIndexer.BaseAtIndex); if (forwardReadIndexer.QualityAtIndex < _minBasecallQuality) { stitchedQualities.Add((byte)reverseReadIndexer.QualityAtIndex); } else { stitchedQualities.Add(0); } //this was a high Q disagreement, and dangerous! we will filter this base. } } } } forwardReadIndexer.Increment(); reverseReadIndexer.Increment(); break; default: throw new ArgumentOutOfRangeException(); } } // Validate stitched cigar var r2CigarLength = read2.CigarData.Cast <CigarOp>().Sum(op => (int)op.Length); var r1CigarLength = read1.CigarData.Cast <CigarOp>().Sum(op => (int)op.Length); var stitchedCigarLength = stitchingInfo.StitchedCigar.Cast <CigarOp>().Sum(op => (int)op.Length); var earliestStart = Math.Min(read1.ClipAdjustedPosition, read2.ClipAdjustedPosition); var latestEnd = Math.Max(read1.ClipAdjustedPosition + r1CigarLength, read2.ClipAdjustedPosition + r2CigarLength); //var latestEnd = Math.Max(read1.ClipAdjustedPosition + read1.CigarData.GetReadSpan(), read2.ClipAdjustedPosition + read2.CigarData+ stitchingInfo.InsertionAdjustment); if (stitchedCigarLength != (latestEnd - earliestStart)) { // TODO what is really the point of this??? if (_debug) { Logger.WriteToLog(string.Format( "Attempted stitched cigar {0} is not consistent with component reads {1}:{2} and {3}:{4}", stitchingInfo.StitchedCigar, read1.Position, read1.CigarData, read2.Position, read2.CigarData)); } _statusCounter.AddDebugStatusCount("Attempted stitched cigar not consistent with component reads"); //return null; } // TODO investigate if these are ever worth handling if (stitchingInfo.StitchedCigar.Count > 0 && stitchingInfo.StitchedCigar.GetReadSpan() != stitchedBases.Count) { if (_debug) { Logger.WriteToLog(string.Format("Invalid cigar '{0}': does not match length {1} of read ({2})", stitchingInfo.StitchedCigar, stitchedBases.Count, read1.Name)); } _statusCounter.AddDebugStatusCount("Invalid cigar does not match length of read"); return(null); } var mergedRead = new Read(read1.Chromosome, new BamAlignment { Name = read1.Name, Bases = string.Join("", stitchedBases), Position = Math.Min(read1.Position - 1, read2.Position - 1), Qualities = stitchedQualities.ToArray(), CigarData = stitchingInfo.StitchedCigar }) { StitchedCigar = stitchingInfo.StitchedCigar, CigarDirections = new CigarDirection(stitchingInfo.StitchedDirections.ToString()) }; return(mergedRead); }
public bool TryStitch(AlignmentSet set) { try { if (set.PartnerRead1 == null || set.PartnerRead2 == null) { throw new ArgumentException("Set has missing read."); } if (IsStitchable(set)) { // Assumption is that exactly one read is first mate var r1IsFirstMate = !set.PartnerRead2.IsFirstMate; // Assumption is that exactly one read is forward and one read is reverse, and each component read is only one direction // GetStitchedCigar returns null if cigars can't possibly agree var stitchingInfo = _cigarReconciler.GetStitchedCigar(set.PartnerRead1.CigarData, set.PartnerRead1.Position, set.PartnerRead2.CigarData, set.PartnerRead2.Position, set.PartnerRead1.SequencedBaseDirectionMap.First() == DirectionType.Reverse, set.IsOutie, set.PartnerRead1.Sequence, set.PartnerRead2.Sequence, set.PartnerRead1.Qualities, set.PartnerRead2.Qualities, r1IsFirstMate); if (stitchingInfo != null && stitchingInfo.NumDisagreeingBases > _thresholdNumDisagreeingBases) { stitchingInfo = null; } if (stitchingInfo != null) { var stitchedCigar = stitchingInfo.StitchedCigar; if (stitchedCigar == null) // there's an overlap but we can't figure out the cigar - TODO revisit { _statusCounter.AddDebugStatusCount("Reads overlap but we can't figure out the cigar"); return(false); } // Returns null if unable to generate consensus var mergedRead = stitchingInfo.IsSimple ? _readMerger.GenerateConsensusReadForSimple(set.PartnerRead1, set.PartnerRead2, stitchingInfo, set.IsOutie) : _readMerger.GenerateConsensusRead(set.PartnerRead1, set.PartnerRead2, stitchingInfo, set.IsOutie); if (mergedRead != null) { mergedRead.BamAlignment.RefID = set.PartnerRead1.BamAlignment.RefID; mergedRead.IsDuplex = set.PartnerRead1.IsDuplex || set.PartnerRead2.IsDuplex; mergedRead.CigarDirections = stitchingInfo.StitchedDirections; mergedRead.BamAlignment.MapQuality = Math.Max(set.PartnerRead1.MapQuality, set.PartnerRead2.MapQuality); if (_dontStitchHomopolymerBridge) { var bridgeAnchored = OverlapEvaluator.BridgeAnchored(mergedRead); if (!bridgeAnchored) { _statusCounter.AddDebugStatusCount("Overlapping bases are repeat, cannot reliably stitch"); return(false); } } set.ReadsForProcessing.Add(mergedRead); _statusCounter.AddDebugStatusCount("Reads succesfully merge"); return(true); } } } // If we didn't return true already, stitching failed. if (_debug) { Logger.WriteToLog("Stitching failed on read " + set.PartnerRead1.Name); } if (_nifyUnstitchablePairs && IsStitchable(set)) { // TODO consider removing this functionality. // Give a merged, Nified read if the pairs are stitchable (i.e. overlap) but conflicting try { var mergedRead = _readMerger.GenerateNifiedMergedRead(set, _useSoftclippedBases); mergedRead.BamAlignment.RefID = set.PartnerRead1.BamAlignment.RefID; mergedRead.IsDuplex = set.PartnerRead1.IsDuplex || set.PartnerRead2.IsDuplex; mergedRead.BamAlignment.MapQuality = Math.Max(set.PartnerRead1.MapQuality, set.PartnerRead2.MapQuality); set.ReadsForProcessing.Add(mergedRead); _statusCounter.AddDebugStatusCount("Unstitchable pair N-ified"); return(true); } catch (Exception e) { Logger.WriteExceptionToLog(e); _statusCounter.AddDebugStatusCount("Unstitchable pair unable to Nify, returned individually"); set.ReadsForProcessing.Add(set.PartnerRead1); set.ReadsForProcessing.Add(set.PartnerRead2); } } else { _statusCounter.AddDebugStatusCount("Unstitchable pair returned individually"); set.ReadsForProcessing.Add(set.PartnerRead1); set.ReadsForProcessing.Add(set.PartnerRead2); } return(false); } catch (Exception e) { throw new Exception("Stitching failed for read '" + set.PartnerRead1.Name + "': " + e.Message + "..." + e.StackTrace, e.InnerException); } }
public bool TryStitch(AlignmentSet set) { if (set.PartnerRead1 == null || set.PartnerRead2 == null) { throw new ArgumentException("Set has missing read."); } if (IsStitchable(set)) { // Assumption is that exactly one read is forward and one read is reverse, and each component read is only one direction // GetStitchedCigar returns null if cigars can't possibly agree var stitchingInfo = _cigarReconciler.GetStitchedCigar(set.PartnerRead1.CigarData, set.PartnerRead1.Position, set.PartnerRead2.CigarData, set.PartnerRead2.Position, set.PartnerRead1.SequencedBaseDirectionMap.First() == DirectionType.Reverse, set.IsOutie); if (stitchingInfo != null) { var stitchedCigar = stitchingInfo.StitchedCigar; if (stitchedCigar == null) // there's an overlap but we can't figure out the cigar - TODO revisit { _statusCounter.AddDebugStatusCount("Reads overlap but we can't figure out the cigar"); return(false); } // Returns null if unable to generate consensus var mergedRead = _readMerger.GenerateConsensusRead(set.PartnerRead1, set.PartnerRead2, stitchingInfo, set.IsOutie); if (mergedRead != null) { mergedRead.BamAlignment.RefID = set.PartnerRead1.BamAlignment.RefID; mergedRead.IsDuplex = set.PartnerRead1.IsDuplex || set.PartnerRead2.IsDuplex; mergedRead.CigarDirections = stitchingInfo.StitchedDirections; mergedRead.BamAlignment.MapQuality = Math.Max(set.PartnerRead1.MapQuality, set.PartnerRead2.MapQuality); set.ReadsForProcessing.Add(mergedRead); _statusCounter.AddDebugStatusCount("Reads succesfully merge"); return(true); } } } // If we didn't return true already, stitching failed. if (_debug) { Logger.WriteToLog("Stitching failed on read " + set.PartnerRead1.Name); } if (_nifyUnstitchablePairs && IsStitchable(set)) { // Give a merged, Nified read if the pairs are stitchable (i.e. overlap) but conflicting var mergedRead = _readMerger.GenerateNifiedMergedRead(set, _useSoftclippedBases); mergedRead.BamAlignment.RefID = set.PartnerRead1.BamAlignment.RefID; mergedRead.IsDuplex = set.PartnerRead1.IsDuplex || set.PartnerRead2.IsDuplex; mergedRead.BamAlignment.MapQuality = Math.Max(set.PartnerRead1.MapQuality, set.PartnerRead2.MapQuality); set.ReadsForProcessing.Add(mergedRead); _statusCounter.AddDebugStatusCount("Unstitchable pair N-ified"); return(true); } else { _statusCounter.AddDebugStatusCount("Unstitchable pair returned individually"); set.ReadsForProcessing.Add(set.PartnerRead1); set.ReadsForProcessing.Add(set.PartnerRead2); } return(false); }
public Read GenerateConsensusReadForSimple(Read read1, Read read2, StitchingInfo stitchingInfo, bool isOutie) { _stitchedBases.Clear(); _stitchedQualities.Clear(); var startIndexInR1 = 0; var startIndexInR2 = 0; var r1PrefixClip = (int)read1.CigarData.GetPrefixClip(); var r2PrefixClip = (int)read2.CigarData.GetPrefixClip(); var r1SuffixClipEnd = (int)read1.CigarData.GetReadSpan(); var r2SuffixClipEnd = (int)read2.CigarData.GetReadSpan(); var r1SuffixClipBegin = r1SuffixClipEnd - read1.CigarData.GetSuffixClip(); var r2SuffixClipBegin = r2SuffixClipEnd - read2.CigarData.GetSuffixClip(); if (!_useSoftclippedBases) { if (r2PrefixClip == 0) { // If we're not using softclipped bases to count toward stitching-ness, we need to fast-forward ahead of the sofctclip bases in R2 // That way, we essentially ignore the R2 softclipped bases and start from the "real" calls once we get to the Reverse-only region startIndexInR2 += (int)read2.CigarData.GetPrefixClip(); } } if (_ignoreProbeSoftclips) { if (isOutie) { startIndexInR2 += stitchingInfo.IgnoredProbePrefixBases; } else if (r2PrefixClip == 0) { startIndexInR1 += stitchingInfo.IgnoredProbePrefixBases; } } var r1Indexer = new ReadIndexer(startIndexInR1); var r2Indexer = new ReadIndexer(startIndexInR2); ReadIndexer forwardReadIndexer; ReadIndexer reverseReadIndexer; var read1Reverse = false; // Assumption is that exactly one read is forward and one read is reverse, and each component read is only one direction if (read1.SequencedBaseDirectionMap.First() == DirectionType.Forward) { forwardReadIndexer = r1Indexer; reverseReadIndexer = r2Indexer; } else { read1Reverse = true; forwardReadIndexer = r2Indexer; reverseReadIndexer = r1Indexer; } var r1SoftclipBeforeR2 = read2.ClipAdjustedPosition - read1.ClipAdjustedPosition; var r2SoftclipBeforeR1 = read1.ClipAdjustedPosition - read2.ClipAdjustedPosition; var expandedDirections = stitchingInfo.StitchedDirections.Expand(); var expandedCigar = stitchingInfo.StitchedCigar.ExpandToChars(); //CigarDirectionExpander cigarDirectionExpander = new CigarDirectionExpander(stitchingInfo.StitchedDirections); //for (CigarExtensions.CigarOpExpander cigarExpander = new CigarExtensions.CigarOpExpander(stitchingInfo.StitchedCigar); // cigarExpander.IsNotEnd() && cigarDirectionExpander.IsNotEnd(); // cigarExpander.MoveNext(), cigarDirectionExpander.MoveNext()) for (int i = 0; i < expandedCigar.Count; i++) { var cigarType = expandedCigar[i]; var direction = expandedDirections[i]; if (cigarType == 'D') { continue; } var r1Index = r1Indexer.Index; var r2Index = r2Indexer.Index; if (r1SoftclipBeforeR2 > 0) { if (r1Index == r1SoftclipBeforeR2 && !r2Indexer.StartedIndexing) { r2Indexer.StartIndexing(); } } else if (r2SoftclipBeforeR1 > 0) { if (r2Index == r2SoftclipBeforeR1 && !r1Indexer.StartedIndexing) { r1Indexer.StartIndexing(); } } else { r1Indexer.StartIndexing(); r2Indexer.StartIndexing(); } // Start moving in read if needed switch (direction) { case DirectionType.Forward: if (!forwardReadIndexer.StartedIndexing) { forwardReadIndexer.StartIndexing(); } break; case DirectionType.Reverse: if (!reverseReadIndexer.StartedIndexing) { reverseReadIndexer.StartIndexing(); } break; case DirectionType.Stitched: if (!forwardReadIndexer.StartedIndexing) { forwardReadIndexer.StartIndexing(); } if (!reverseReadIndexer.StartedIndexing) { reverseReadIndexer.StartIndexing(); } break; } var forwardIndex = read1Reverse ? r2Index : r1Index; var reverseIndex = read1Reverse ? r1Index : r2Index; var forwardPrefixClip = read1Reverse ? r2PrefixClip : r1PrefixClip; var reversePrefixClip = read1Reverse ? r1PrefixClip : r2PrefixClip; var reverseSuffixClipEnd = read1Reverse ? r1SuffixClipEnd : r2SuffixClipEnd; var forwardSuffixClipEnd = read1Reverse ? r2SuffixClipEnd : r1SuffixClipEnd; var forwardSuffixClipBegin = read1Reverse ? r2SuffixClipBegin : r1SuffixClipBegin; var reverseSuffixClipBegin = read1Reverse ? r1SuffixClipBegin : r2SuffixClipBegin; // If R1 & R2 are both in prefix softclips, favor R2 as more "real" and skip over the R1 base if (forwardReadIndexer.StartedIndexing && forwardIndex >= 0 && forwardIndex < forwardPrefixClip) { if (reverseReadIndexer.StartedIndexing && reverseIndex >= 0 && reverseIndex < reversePrefixClip) { direction = DirectionType.Reverse; forwardReadIndexer.Increment(); } } // If R1 & R2 are both in suffix softclips, favor R1 as more "real" and skip over the R2 base if (reverseReadIndexer.StartedIndexing && reverseIndex >= reverseSuffixClipBegin && reverseIndex < reverseSuffixClipEnd) { if (forwardReadIndexer.StartedIndexing && forwardIndex >= forwardSuffixClipBegin && forwardIndex < forwardSuffixClipEnd) { direction = DirectionType.Forward; reverseReadIndexer.Increment(); } } if (r1Index >= 0 && r1Index < read1.BamAlignment.Bases.Length) { r1Indexer.BaseAtIndex = read1.BamAlignment.Bases[r1Index]; r1Indexer.QualityAtIndex = read1.Qualities[r1Index]; } else { r1Indexer.BaseAtIndex = null; r1Indexer.QualityAtIndex = null; } if (r2Index >= 0 && r2Index < read2.BamAlignment.Bases.Length) { r2Indexer.BaseAtIndex = read2.BamAlignment.Bases[r2Index]; r2Indexer.QualityAtIndex = read2.Qualities[r2Index]; } else { r2Indexer.BaseAtIndex = null; r2Indexer.QualityAtIndex = null; } switch (direction) { case DirectionType.Forward: if (forwardReadIndexer.BaseAtIndex == null) { throw new InvalidDataException("Forward base at index " + forwardReadIndexer.Index + " is null."); } _stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); _stitchedQualities.Add((byte)forwardReadIndexer.QualityAtIndex); forwardReadIndexer.Increment(); break; case DirectionType.Reverse: if (reverseReadIndexer.BaseAtIndex == null) { throw new InvalidDataException("Reverse base at index " + reverseReadIndexer.Index + " is null."); } _stitchedBases.Add((char)reverseReadIndexer.BaseAtIndex); // TODO - stringbuilder instead? _stitchedQualities.Add((byte)reverseReadIndexer.QualityAtIndex); reverseReadIndexer.Increment(); break; case DirectionType.Stitched: if (forwardReadIndexer.BaseAtIndex != null && reverseReadIndexer.BaseAtIndex != null) { if (forwardReadIndexer.BaseAtIndex == reverseReadIndexer.BaseAtIndex) { _stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); var sumQuality = Convert.ToInt32((byte)forwardReadIndexer.QualityAtIndex) + Convert.ToInt32((byte)reverseReadIndexer.QualityAtIndex); var stitchedQuality = sumQuality > MaxBaseQuality ? MaxBaseQuality : sumQuality; stitchingInfo.NumAgreements++; _stitchedQualities.Add((byte)stitchedQuality); } else if (!_treatNasDisagreement && (reverseReadIndexer.BaseAtIndex == 'N' || reverseReadIndexer.QualityAtIndex == 0)) { _stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); var sumQuality = Convert.ToInt32((byte)forwardReadIndexer.QualityAtIndex); var stitchedQuality = sumQuality > MaxBaseQuality ? MaxBaseQuality : sumQuality; stitchingInfo.NumNDisagreements++; _stitchedQualities.Add((byte)stitchedQuality); } else if (!_treatNasDisagreement && (forwardReadIndexer.BaseAtIndex == 'N' || forwardReadIndexer.QualityAtIndex == 0)) { _stitchedBases.Add((char)reverseReadIndexer.BaseAtIndex); var sumQuality = Convert.ToInt32((byte)reverseReadIndexer.QualityAtIndex); var stitchedQuality = sumQuality > MaxBaseQuality ? MaxBaseQuality : sumQuality; stitchingInfo.NumNDisagreements++; _stitchedQualities.Add((byte)stitchedQuality); } else //the bases disagree... { stitchingInfo.NumDisagreeingBases++; if (_nifyDisagreements) { // we have disagreeing bases AND we chose to always Nify them _stitchedBases.Add('N'); _stitchedQualities.Add(0); } else { if ((byte)forwardReadIndexer.QualityAtIndex >= reverseReadIndexer.QualityAtIndex) // Original stitching implementation -- TODO, reconcile this with new reqs. { _stitchedBases.Add((char)forwardReadIndexer.BaseAtIndex); if (reverseReadIndexer.QualityAtIndex < _minBasecallQuality) { _stitchedQualities.Add((byte)forwardReadIndexer.QualityAtIndex); } else { _stitchedQualities.Add(0); } //this was a high Q disagreement, and dangerous! we will filter this base. } else //if ((byte)forwardReadIndexer.QualityAtIndex < reverseReadIndexer.QualityAtIndex) // Original stitching implementation { _stitchedBases.Add((char)reverseReadIndexer.BaseAtIndex); if (forwardReadIndexer.QualityAtIndex < _minBasecallQuality) { _stitchedQualities.Add((byte)reverseReadIndexer.QualityAtIndex); } else { _stitchedQualities.Add(0); } //this was a high Q disagreement, and dangerous! we will filter this base. } } } } forwardReadIndexer.Increment(); reverseReadIndexer.Increment(); break; default: throw new ArgumentOutOfRangeException(); } } // Validate stitched cigar //var r2CigarLength = read2.CigarData.Cast<CigarOp>().Sum(op => (int)op.Length); //var r1CigarLength = read1.CigarData.Cast<CigarOp>().Sum(op => (int)op.Length); var r2CigarLength = read2.CigarData.GetCigarSpan(); var r1CigarLength = read1.CigarData.GetCigarSpan(); //var stitchedCigarLength = stitchingInfo.StitchedCigar.Cast<CigarOp>().Sum(op => (int)op.Length); var stitchedCigarLength = stitchingInfo.StitchedCigar.GetCigarSpan(); var earliestStart = Math.Min(read1.ClipAdjustedPosition, read2.ClipAdjustedPosition); var latestEnd = Math.Max(read1.ClipAdjustedPosition + r1CigarLength, read2.ClipAdjustedPosition + r2CigarLength); //var latestEnd = Math.Max(read1.ClipAdjustedPosition + read1.CigarData.GetReadSpan(), read2.ClipAdjustedPosition + read2.CigarData+ stitchingInfo.InsertionAdjustment); if (stitchedCigarLength != (latestEnd - earliestStart)) { // TODO what is really the point of this??? if (_debug) { Logger.WriteToLog(string.Format( "Attempted stitched cigar {0} is not consistent with component reads {1}:{2} and {3}:{4}", stitchingInfo.StitchedCigar, read1.Position, read1.CigarData, read2.Position, read2.CigarData)); } _statusCounter.AddDebugStatusCount("Attempted stitched cigar not consistent with component reads"); //return null; } // TODO investigate if these are ever worth handling if (stitchingInfo.StitchedCigar.Count > 0 && stitchingInfo.StitchedCigar.GetReadSpan() != _stitchedBases.Count) { if (_debug) { Logger.WriteToLog(string.Format("Invalid cigar '{0}': does not match length {1} of read ({2})", stitchingInfo.StitchedCigar, _stitchedBases.Count, read1.Name)); } _statusCounter.AddDebugStatusCount("Invalid cigar does not match length of read"); return(null); } var mergedRead = new Read(read1.Chromosome, new BamAlignment { Name = read1.Name, Bases = GetSequenceFromArray(_stitchedBases), Position = Math.Min(read1.Position - 1, read2.Position - 1), Qualities = _stitchedQualities.ToArray(), CigarData = stitchingInfo.StitchedCigar }) { StitchedCigar = stitchingInfo.StitchedCigar }; return(mergedRead); }