public static int?AnyIndelCoveredInMate(IEnumerable <IndelSite> readIndelPositions, BamAlignment readWithoutIndels, BamAlignment readWithIndels, int anchorSize = 0) { if (readIndelPositions == null || !readIndelPositions.Any()) { return(null); } if (readWithIndels.IsReverseStrand()) { readIndelPositions = readIndelPositions.Reverse(); } foreach (var indelPosition in readIndelPositions) { var coveredInR1 = readWithoutIndels.ContainsPosition(indelPosition.PreviousMappedPosition - anchorSize, readWithIndels.RefID) && readWithoutIndels.ContainsPosition(indelPosition.NextMappedPosition + anchorSize, readWithIndels.RefID); if (coveredInR1) { return(indelPosition.PreviousMappedPosition); } } return(null); }
private static void AddReadLevelIndelMetrics(BamAlignment bamAlignment, bool isReputable, bool stitched, IndelEvidence indelMetrics, bool isRepeat) { indelMetrics.Observations++; if (stitched) { indelMetrics.Stitched++; } else { if (bamAlignment.IsReverseStrand()) { indelMetrics.Reverse++; } else { indelMetrics.Forward++; } } if (isReputable) { indelMetrics.ReputableSupport++; } if (isRepeat) { indelMetrics.IsRepeat++; } if (!bamAlignment.IsMateMapped() || bamAlignment.MateRefID != bamAlignment.RefID) { indelMetrics.IsSplit++; } }
public override void MoveToNextRecord() { _isEnd = !_bamReader.GetNextAlignment(ref _currentBamAlignment, false); if (_isEnd) { return; } // No memory allocation _currentSerializedAlignment.RefID = _currentBamAlignment.RefID; _currentSerializedAlignment.Position = _currentBamAlignment.Position; _currentSerializedAlignment.AlignmentFlag = _currentBamAlignment.AlignmentFlag; _currentSerializedAlignment.FragmentLength = _currentBamAlignment.FragmentLength; _currentSerializedAlignment.MapQuality = _currentBamAlignment.MapQuality; _currentSerializedAlignment.MatePosition = _currentBamAlignment.MatePosition; _currentSerializedAlignment.MateRefID = _currentBamAlignment.MateRefID; _currentSerializedAlignment.IsReverseStrand = _currentBamAlignment.IsReverseStrand(); _currentSerializedAlignment.Name = _currentBamAlignment.Name; }
private static List <BamAlignment> IndelsDisagreeWithStrongMate(List <IndelSite> r1IndelPositions, List <IndelSite> r2IndelPositions, BamAlignment read1, BamAlignment read2, out bool disagree, int mismatchesAllowed = 1, int r1IndelAdjustment = 0, int r2IndelAdjustment = 0, bool softclipWeakOne = true, int?r1Nm = null, int?r2Nm = null) { var checkBoth = true; // TODO maybe also check if one of the reads has ins AND del // TODO if we've grabbed this info here, propagate it out so we don't do it twice // TODO indel adjustment should only actually remove insertions, no?? var read1Nm = r1Nm ?? read1.GetIntTag("NM"); var read2Nm = r2Nm ?? read2.GetIntTag("NM"); var read1AdjustedNm = read1Nm - r1IndelAdjustment; var read2AdjustedNm = read2Nm - r2IndelAdjustment; disagree = false; var r1IndelPositionsUnique = r1IndelPositions != null && r2IndelPositions != null?GetUniqueIndelSites(r1IndelPositions, r2IndelPositions) : r1IndelPositions; var r2IndelPositionsUnique = r1IndelPositions != null && r2IndelPositions != null?GetUniqueIndelSites(r2IndelPositions, r1IndelPositions) : r2IndelPositions; // No sense doing further checks if there's nothing to disagree over... if (r1IndelPositionsUnique.Any() || r2IndelPositionsUnique.Any()) { var r1AdjustedClean = read1AdjustedNm <= mismatchesAllowed; var r2AdjustedClean = read2AdjustedNm <= mismatchesAllowed; var r1Clean = read1Nm <= mismatchesAllowed; var r2Clean = read2Nm <= mismatchesAllowed; var r1NumIndels = r1IndelPositions?.Count; var r2NumIndels = r2IndelPositions?.Count; var r1IsGood = r1AdjustedClean && (r1Clean || r1NumIndels <= 1); var r2IsGood = r2AdjustedClean && (r2Clean || r2NumIndels <= 1); if ((read1Nm != null && read2Nm != null) && (r1IsGood || r2IsGood)) { if (r1IsGood) { var disagreeingPos = AnyIndelCoveredInMate(r2IndelPositionsUnique, read1, read2); if (disagreeingPos != null) { disagree = true; if (softclipWeakOne && !r2IsGood) { SoftclipAfterIndel(read2, read2.IsReverseStrand(), disagreeingPos.Value); } } else { if (checkBoth) { disagreeingPos = AnyIndelCoveredInMate(r1IndelPositionsUnique, read2, read1); if (disagreeingPos != null) { disagree = true; } } } } else { var disagreeingPos = AnyIndelCoveredInMate(r1IndelPositionsUnique, read2, read1); if (disagreeingPos != null) { disagree = true; if (softclipWeakOne && !r1IsGood) { SoftclipAfterIndel(read1, read1.IsReverseStrand(), disagreeingPos.Value); } } else { if (checkBoth) { disagreeingPos = AnyIndelCoveredInMate(r2IndelPositionsUnique, read1, read2); if (disagreeingPos != null) { disagree = true; } } } } } } // If both are good, and they disagree, should still say they disagree? return(new List <BamAlignment>() { read1, read2 }); }
public bool AddSerializedAlignment(ref BamAlignment al) { byte[] byteArray = null; int smallOffset = 0; if (!_serializedAlignments.GetByteArrayAndOffset( _offset, ref byteArray, ref smallOffset)) { // Out of space return(false); } int smallOffsetInitial = smallOffset; if (!SerializeAlignment(ref al, ref byteArray, ref smallOffset)) { // It didn't fit in the subarray. Try the next one. // This math moves to the next array. For example, say the // max size for 1 array is 1000, and we were at 1987. // 1000 - (1987 - 1000 * (1987/1000)) = 13. // 1987 + 13 = 2000 // 2000 is the first element of array number 2. (indexes start at 0). _offset += _serializedAlignments.GetMaxArraySize() - (_offset - _serializedAlignments.GetMaxArraySize() * (_offset / _serializedAlignments.GetMaxArraySize())); if (!_serializedAlignments.GetByteArrayAndOffset( _offset, ref byteArray, ref smallOffset)) { // Out of space return(false); } smallOffsetInitial = smallOffset; if (!SerializeAlignment(ref al, ref byteArray, ref smallOffset)) { // We just checked that we have space. This should never fail. throw new InvalidOperationException("Error: Check available memory. Serialization of alignment failed."); } } int alignmentSize = smallOffset - smallOffsetInitial; if (_bamAlignmentList.Count == 1000) { // Assume the first 1000 records are representative of the typical size // Add 20% to ensure a memory reallocation is unlikely. _bamAlignmentList.Capacity = (int)(1.2 * _serializedAlignments.SizeInBytes / (_offset / 1000)); } _bamAlignmentList.Add(new SerializedBamAlignment( _offset, alignmentSize, al.RefID, al.Position, al.AlignmentFlag, al.FragmentLength, al.MapQuality, al.MatePosition, al.MateRefID, al.IsReverseStrand())); _offset += (UInt64)alignmentSize; return(true); }
public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA) { var alignmentCopy = new BamAlignment(alignment); if (alignmentCopy.IsPrimaryAlignment() && !alignmentCopy.IsSupplementaryAlignment()) { if (FragmentSize == 0) { FragmentSize = Math.Abs(alignmentCopy.FragmentLength); // Can be either F1R2 or F2R1 NormalPairOrientation = (!alignmentCopy.IsReverseStrand() && alignmentCopy.IsMateReverseStrand()) || (alignmentCopy.IsReverseStrand() && !alignmentCopy.IsMateReverseStrand()); if (NormalPairOrientation) { if (alignmentCopy.RefID == alignmentCopy.MateRefID) { if (!alignmentCopy.IsReverseStrand()) { if (alignmentCopy.Position > alignmentCopy.MatePosition) { // RF NormalPairOrientation = false; } } else { if (alignmentCopy.MatePosition > alignmentCopy.Position) { // RF NormalPairOrientation = false; } } } } } NumPrimaryReads++; bool useForPos = true; if (useForPos) { if (alignmentCopy.Position > MaxPosition) { MaxPosition = alignment.Position; } if (alignmentCopy.Position < MinPosition) { MinPosition = alignment.Position; } } if (readNumber == ReadNumber.NA) { if (Read1 != null && Read2 != null) { throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}."); } if (Read1 == null) { Read1 = alignmentCopy; } else { Read2 = alignmentCopy; } } else if (readNumber == ReadNumber.Read1) { if (Read1 != null) { throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}."); } Read1 = alignmentCopy; } else if (readNumber == ReadNumber.Read2) { if (Read2 != null) { throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}."); } Read2 = alignmentCopy; } } else if (alignmentCopy.IsSupplementaryAlignment()) { switch (readNumber) { case ReadNumber.Read1: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SupplementaryAlignments == null) { Read2SupplementaryAlignments = new List <BamAlignment>(); } Read2SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } else { switch (readNumber) { case ReadNumber.Read1: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SecondaryAlignments == null) { Read2SecondaryAlignments = new List <BamAlignment>(); } Read2SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } // Set as improper once we add any alignment that is flagged as improper if (!alignmentCopy.IsProperPair()) { IsImproper = true; } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) { continue; } if (alignment.IsFailedQC()) { continue; } if (alignment.IsDuplicate()) { continue; } if (alignment.IsReverseStrand()) { continue; } if (!alignment.IsMainAlignment()) { continue; } // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) { continue; } if (isPairedEnd && !alignment.IsProperPair()) { continue; } int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) continue; if (alignment.IsFailedQC()) continue; if (alignment.IsDuplicate()) continue; if (alignment.IsReverseStrand()) continue; if (!alignment.IsMainAlignment()) continue; // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) continue; if (isPairedEnd && !alignment.IsProperPair()) continue; int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) break; if (refID == -1) continue; keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }