protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment) { if (_filterPairLowMapQ) { if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality) { return(true); } } if (_filterPairUnmapped) { if (!alignment.IsMapped()) { _statusCounter.AddDebugStatusCount("Skipped not mapped"); return(true); } if (!alignment.IsMateMapped()) { _statusCounter.AddDebugStatusCount("Skipped mate not mapped"); return(true); } } // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!) // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too. // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case, // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup. // This is a highly unlikely degenerate case. var isDuplicate = ReadIsDuplicate(alignment); if (isDuplicate) { _statusCounter.AddStatusCount("Blacklisted Duplicates"); } return(isDuplicate); }
private static void AddReadLevelIndelMetrics(BamAlignment bamAlignment, bool isReputable, bool stitched, IndelEvidence indelMetrics, bool isRepeat) { indelMetrics.Observations++; if (stitched) { indelMetrics.Stitched++; } else { if (bamAlignment.IsReverseStrand()) { indelMetrics.Reverse++; } else { indelMetrics.Forward++; } } if (isReputable) { indelMetrics.ReputableSupport++; } if (isRepeat) { indelMetrics.IsRepeat++; } if (!bamAlignment.IsMateMapped() || bamAlignment.MateRefID != bamAlignment.RefID) { indelMetrics.IsSplit++; } }
private void AdjustMates(string tmpFile, BamWriter writer) { // Second pass: Adjust flags on mates Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count); var read = new BamAlignment(); using (var reader = new BamReader(tmpFile)) { while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } // Adjust flags as needed: var mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1); RemapInfo info; if (!_remappings.TryGetValue(mateKey, out info)) { writer.WriteAlignment(read); continue; } if (info.Start == -1) { read.SetIsMateUnmapped(true); read.SetIsProperPair(false); read.FragmentLength = 0; } else { read.MatePosition = info.Start; } if (read.IsMateMapped() && read.IsProperPair()) { int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1; // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs read.FragmentLength = (read.Position < info.Start ? info.End - read.Position + 1 : info.Start - readEnd - 1); } writer.WriteAlignment(read); } } }
private bool MayOverlapMate(BamAlignment alignment) { if (!alignment.IsMateMapped()) { return(false); } if (!alignment.IsMapped()) { return(false); } if (alignment.RefID != alignment.MateRefID) { return(false); } if (Math.Abs(alignment.Position - alignment.MatePosition) > _maxPairGap) { return(false); } return(true); }
private PairStatus SingleReadStatus(BamAlignment alignment) { if ((alignment.RefID != alignment.MateRefID && alignment.IsPaired())) { return(PairStatus.SplitChromosomes); // Stitched reads will have split ref ids too but not the same thing } if (((!alignment.IsMateMapped() && alignment.RefID == -1) || (!alignment.IsMapped() && alignment.MateRefID == -1))) { return(PairStatus.MateUnmapped); } if (alignment.IsDuplicate()) { return(PairStatus.Duplicate); } if (_considerInsertSize) { if (alignment.IsPaired() && !OverlapsMate(alignment)) { return(PairStatus.LongFragment); } } return(PairStatus.Unknown); }
protected override bool ShouldBlacklistReadIndexer(BamAlignment alignment) { if (_filterPairLowMapQ) { if (alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality) { return(true); } } if (_filterPairUnmapped) { // Need to check mapped flag in addition to refid because some pairs have one mate mapped and one mate mapped right next to it but with mapq 0 and with mapping(chr: pos) information. This allows us to distinguish those from truly unmapped("don't know what the heck to do with this") reads if (!alignment.IsMapped() && alignment.RefID == -1) { _statusCounter.AddDebugStatusCount("Skipped not mapped"); return(true); } if (!alignment.IsMateMapped() && alignment.MateRefID == -1) { _statusCounter.AddDebugStatusCount("Skipped mate not mapped"); return(true); } } // Only check if read is duplicate once (otherwise de novo dup finder will falsely mark dup because it has seen this read before!) // Blacklist rather than just skipping because if one mate is duplicate, we presume the other one is too. // Note: This breaks down is if we have a fusion read and the first mate we see is not a duplicate and the second mate is. In our case, // (if we are not trying to mate fusions) we will flush the first mate to bam without knowing that the second mate is a dup. // This is a highly unlikely degenerate case. var isDuplicate = ReadIsDuplicate(alignment); if (isDuplicate) { _statusCounter.AddStatusCount("Blacklisted Duplicates"); } return(isDuplicate); }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex, HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex, HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }