protected override bool ShouldSkipRead(BamAlignment alignment) { if (!_filterPairLowMapQ && alignment.MapQuality > 0 && alignment.MapQuality < _minMapQuality) { _statusCounter.AddDebugStatusCount("Skipped read below mapQ"); return(true); } if (alignment.IsSupplementaryAlignment()) { _statusCounter.AddDebugStatusCount("Skipped supplementary"); return(true); } if (alignment.IsSecondary()) { _statusCounter.AddDebugStatusCount("Skipped secondary"); return(true); } if (_filterForProperPairs && !alignment.IsProperPair()) { _statusCounter.AddDebugStatusCount("Skipped improper pair"); return(true); } return(false); }
protected override bool ShouldSkipRead(BamAlignment alignment) { if (!alignment.IsMapped()) { _statusCounter.AddDebugStatusCount("Skipped not mapped"); return(true); } if (alignment.IsSupplementaryAlignment()) { _statusCounter.AddDebugStatusCount("Skipped supplementary"); return(true); } if (_filterForProperPairs && !alignment.IsProperPair()) { _statusCounter.AddDebugStatusCount("Skipped improper pair"); return(true); } if (alignment.MapQuality < _minMapQuality) { _statusCounter.AddDebugStatusCount("Skipped low map quality"); return(true); } return(false); }
private bool ShouldSkipRead(BamAlignment alignment) { if (alignment.IsSupplementaryAlignment() || !alignment.IsPrimaryAlignment()) { return(true); } if (_filterForProperPairs && !alignment.IsProperPair()) { return(true); } return(false); }
private void AdjustMates(string tmpFile, BamWriter writer) { // Second pass: Adjust flags on mates Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count); var read = new BamAlignment(); using (var reader = new BamReader(tmpFile)) { while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } // Adjust flags as needed: var mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1); RemapInfo info; if (!_remappings.TryGetValue(mateKey, out info)) { writer.WriteAlignment(read); continue; } if (info.Start == -1) { read.SetIsMateUnmapped(true); read.SetIsProperPair(false); read.FragmentLength = 0; } else { read.MatePosition = info.Start; } if (read.IsMateMapped() && read.IsProperPair()) { int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1; // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs read.FragmentLength = (read.Position < info.Start ? info.End - read.Position + 1 : info.Start - readEnd - 1); } writer.WriteAlignment(read); } } }
public void FromBam() { var alignment = new BamAlignment { Bases = "ATCTTA", Position = 100, MatePosition = 500, Name = "test", CigarData = new CigarAlignment("5M1S"), MapQuality = 10, Qualities = new[] { (byte)10, (byte)20, (byte)30 } }; alignment.SetIsDuplicate(true); alignment.SetIsProperPair(true); alignment.SetIsSecondaryAlignment(true); alignment.SetIsUnmapped(true); var read = new Read("chr1", alignment); Assert.Equal(read.Chromosome, "chr1"); Assert.Equal(read.Sequence, alignment.Bases); Assert.Equal(read.Position, alignment.Position + 1); Assert.Equal(read.MatePosition, alignment.MatePosition + 1); Assert.Equal(read.Name, alignment.Name); Assert.Equal(read.CigarData, alignment.CigarData); Assert.Equal(read.IsMapped, alignment.IsMapped()); Assert.Equal(read.IsProperPair, alignment.IsProperPair()); Assert.Equal(read.IsPrimaryAlignment, alignment.IsPrimaryAlignment()); Assert.Equal(read.IsPcrDuplicate, alignment.IsDuplicate()); foreach (var direction in read.SequencedBaseDirectionMap) { Assert.Equal(direction, DirectionType.Forward); } for (var i = 0; i < read.Qualities.Length; i++) { Assert.Equal(read.Qualities[i], alignment.Qualities[i]); } }
public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA) { var alignmentCopy = new BamAlignment(alignment); if (alignment.IsPrimaryAlignment() && !alignment.IsSupplementaryAlignment()) { if (readNumber == ReadNumber.NA) { if (Read1 != null && Read2 != null) { throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}."); } if (Read1 == null) { Read1 = alignmentCopy; } else { Read2 = alignmentCopy; } } else if (readNumber == ReadNumber.Read1) { if (Read1 != null) { throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}."); } Read1 = alignmentCopy; } else if (readNumber == ReadNumber.Read2) { if (Read2 != null) { throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}."); } Read2 = alignmentCopy; } } else if (alignment.IsSupplementaryAlignment()) { switch (readNumber) { case ReadNumber.Read1: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SupplementaryAlignments == null) { Read2SupplementaryAlignments = new List <BamAlignment>(); } Read2SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } else { switch (readNumber) { case ReadNumber.Read1: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SecondaryAlignments == null) { Read2SecondaryAlignments = new List <BamAlignment>(); } Read2SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } // Set as improper once we add any alignment that is flagged as improper if (!alignment.IsProperPair()) { IsImproper = true; } }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex, HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }
public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA) { var alignmentCopy = new BamAlignment(alignment); if (alignmentCopy.IsPrimaryAlignment() && !alignmentCopy.IsSupplementaryAlignment()) { if (FragmentSize == 0) { FragmentSize = Math.Abs(alignmentCopy.FragmentLength); // Can be either F1R2 or F2R1 NormalPairOrientation = (!alignmentCopy.IsReverseStrand() && alignmentCopy.IsMateReverseStrand()) || (alignmentCopy.IsReverseStrand() && !alignmentCopy.IsMateReverseStrand()); if (NormalPairOrientation) { if (alignmentCopy.RefID == alignmentCopy.MateRefID) { if (!alignmentCopy.IsReverseStrand()) { if (alignmentCopy.Position > alignmentCopy.MatePosition) { // RF NormalPairOrientation = false; } } else { if (alignmentCopy.MatePosition > alignmentCopy.Position) { // RF NormalPairOrientation = false; } } } } } NumPrimaryReads++; bool useForPos = true; if (useForPos) { if (alignmentCopy.Position > MaxPosition) { MaxPosition = alignment.Position; } if (alignmentCopy.Position < MinPosition) { MinPosition = alignment.Position; } } if (readNumber == ReadNumber.NA) { if (Read1 != null && Read2 != null) { throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}."); } if (Read1 == null) { Read1 = alignmentCopy; } else { Read2 = alignmentCopy; } } else if (readNumber == ReadNumber.Read1) { if (Read1 != null) { throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}."); } Read1 = alignmentCopy; } else if (readNumber == ReadNumber.Read2) { if (Read2 != null) { throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}."); } Read2 = alignmentCopy; } } else if (alignmentCopy.IsSupplementaryAlignment()) { switch (readNumber) { case ReadNumber.Read1: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SupplementaryAlignments == null) { Read2SupplementaryAlignments = new List <BamAlignment>(); } Read2SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } else { switch (readNumber) { case ReadNumber.Read1: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SecondaryAlignments == null) { Read2SecondaryAlignments = new List <BamAlignment>(); } Read2SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } // Set as improper once we add any alignment that is flagged as improper if (!alignmentCopy.IsProperPair()) { IsImproper = true; } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) { continue; } if (alignment.IsFailedQC()) { continue; } if (alignment.IsDuplicate()) { continue; } if (alignment.IsReverseStrand()) { continue; } if (!alignment.IsMainAlignment()) { continue; } // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) { continue; } if (isPairedEnd && !alignment.IsProperPair()) { continue; } int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) continue; if (alignment.IsFailedQC()) continue; if (alignment.IsDuplicate()) continue; if (alignment.IsReverseStrand()) continue; if (!alignment.IsMainAlignment()) continue; // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) continue; if (isPairedEnd && !alignment.IsProperPair()) continue; int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) break; if (refID == -1) continue; keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex, HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }