private bool ShouldSkipRead(BamAlignment alignment) { if (alignment.IsSupplementaryAlignment() || !alignment.IsPrimaryAlignment()) { return(true); } if (_filterForProperPairs && !alignment.IsProperPair()) { return(true); } return(false); }
public void FromBam() { var alignment = new BamAlignment { Bases = "ATCTTA", Position = 100, MatePosition = 500, Name = "test", CigarData = new CigarAlignment("5M1S"), MapQuality = 10, Qualities = new[] { (byte)10, (byte)20, (byte)30 } }; alignment.SetIsDuplicate(true); alignment.SetIsProperPair(true); alignment.SetIsSecondaryAlignment(true); alignment.SetIsUnmapped(true); var read = new Read("chr1", alignment); Assert.Equal(read.Chromosome, "chr1"); Assert.Equal(read.Sequence, alignment.Bases); Assert.Equal(read.Position, alignment.Position + 1); Assert.Equal(read.MatePosition, alignment.MatePosition + 1); Assert.Equal(read.Name, alignment.Name); Assert.Equal(read.CigarData, alignment.CigarData); Assert.Equal(read.IsMapped, alignment.IsMapped()); Assert.Equal(read.IsProperPair, alignment.IsProperPair()); Assert.Equal(read.IsPrimaryAlignment, alignment.IsPrimaryAlignment()); Assert.Equal(read.IsPcrDuplicate, alignment.IsDuplicate()); foreach (var direction in read.SequencedBaseDirectionMap) { Assert.Equal(direction, DirectionType.Forward); } for (var i = 0; i < read.Qualities.Length; i++) { Assert.Equal(read.Qualities[i], alignment.Qualities[i]); } }
/// <summary> /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file. /// </summary> protected void ProcessBamFile(string bamPath) { Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath); int overallCount = 0; int nextVariantIndex = 0; using (BamReader reader = new BamReader(bamPath)) { BamAlignment read = new BamAlignment(); int refID = reader.GetReferenceIndex(this.Chromosome); if (refID < 0) { throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath)); } Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome); reader.Jump(refID, 0); while (true) { bool result = reader.GetNextAlignment(ref read, false); if (!result) break; if (!read.HasPosition() || read.RefID > refID) break; // We're past our chromosome of interest. if (read.RefID < refID) continue; // We're not yet on our chromosome of interest. overallCount++; if (overallCount % 1000000 == 0) { Console.WriteLine("Record {0} at {1}...", overallCount, read.Position); } // Skip over unaligned or other non-count-worthy reads: if (!read.IsPrimaryAlignment()) continue; if (!read.IsMapped()) continue; if (read.IsDuplicate()) continue; if (read.MapQuality <= MinimumMapQ) continue; // Scan forward through the variants list, to keep up with our reads: while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position) { nextVariantIndex++; } if (nextVariantIndex >= this.Variants.Count) break; // If the read doesn't look like it has a reasonable chance of touching the next variant, continue: if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) continue; // This read potentially overlaps next variant (and further variants). Count bases! ProcessReadBases(read, nextVariantIndex); } } Console.WriteLine("Looped over {0} bam records in all", overallCount); }
public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA) { var alignmentCopy = new BamAlignment(alignment); if (alignment.IsPrimaryAlignment() && !alignment.IsSupplementaryAlignment()) { if (readNumber == ReadNumber.NA) { if (Read1 != null && Read2 != null) { throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}."); } if (Read1 == null) { Read1 = alignmentCopy; } else { Read2 = alignmentCopy; } } else if (readNumber == ReadNumber.Read1) { if (Read1 != null) { throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}."); } Read1 = alignmentCopy; } else if (readNumber == ReadNumber.Read2) { if (Read2 != null) { throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}."); } Read2 = alignmentCopy; } } else if (alignment.IsSupplementaryAlignment()) { switch (readNumber) { case ReadNumber.Read1: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SupplementaryAlignments == null) { Read2SupplementaryAlignments = new List <BamAlignment>(); } Read2SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } else { switch (readNumber) { case ReadNumber.Read1: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SecondaryAlignments == null) { Read2SecondaryAlignments = new List <BamAlignment>(); } Read2SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } // Set as improper once we add any alignment that is flagged as improper if (!alignment.IsProperPair()) { IsImproper = true; } }
/// <summary> /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file. /// </summary> protected void ProcessBamFile(string bamPath) { Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath); int overallCount = 0; int nextVariantIndex = 0; using (BamReader reader = new BamReader(bamPath)) { BamAlignment read = new BamAlignment(); int refID = reader.GetReferenceIndex(this.Chromosome); if (refID < 0) { throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath)); } Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome); reader.Jump(refID, 0); while (true) { bool result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (!read.HasPosition() || read.RefID > refID) { break; // We're past our chromosome of interest. } if (read.RefID < refID) { continue; // We're not yet on our chromosome of interest. } overallCount++; if (overallCount % 1000000 == 0) { Console.WriteLine("Record {0} at {1}...", overallCount, read.Position); } // Skip over unaligned or other non-count-worthy reads: if (!read.IsPrimaryAlignment()) { continue; } if (!read.IsMapped()) { continue; } if (read.IsDuplicate()) { continue; } if (read.MapQuality <= MinimumMapQ) { continue; } // Scan forward through the variants list, to keep up with our reads: while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position) { nextVariantIndex++; } if (nextVariantIndex >= this.Variants.Count) { break; } // If the read doesn't look like it has a reasonable chance of touching the next variant, continue: if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) { continue; } // This read potentially overlaps next variant (and further variants). Count bases! ProcessReadBases(read, nextVariantIndex); } } Console.WriteLine("Looped over {0} bam records in all", overallCount); }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary <string, int> readNameToBinIndex, HashSet <string> samePositionReadNames, ref long usableFragmentCount, List <SampleGenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }
public static List <PreIndel> FindIndelsAndRecordEvidence(BamAlignment bamAlignment, IndelTargetFinder targetFinder, Dictionary <string, IndelEvidence> lookup, bool isReputable, string chrom, int minMapQuality, bool stitched = false) { // TODO define whether we want to collect indels from supplementaries. I think we probably do... // TODO do we want to collect indels from duplicates? // Was thinking this might be faster than checking all the ops on all the reads, we'll see - it also makes an important assumption that no reads are full I or full D if (bamAlignment.MapQuality > minMapQuality && bamAlignment.CigarData.Count > 1 && bamAlignment.IsPrimaryAlignment()) { var indels = targetFinder.FindIndels(bamAlignment, chrom); if (indels.Any()) { // TODO this doesn't support nm from stitched, which is not in a tag. Need to pass it in!! var nm = bamAlignment.GetIntTag("NM"); var totalNm = nm ?? 0; var isMulti = indels.Count() > 1; int readSpanNeededToCoverBoth = 0; if (isMulti) { var firstPosOfVariation = indels[0].ReferencePosition; var lastIndel = indels[indels.Count - 1]; var lastPosOfVariation = lastIndel.Type == AlleleCategory.Deletion ? lastIndel.ReferencePosition + 1 : lastIndel.ReferencePosition + lastIndel.Length; readSpanNeededToCoverBoth = lastPosOfVariation - firstPosOfVariation; } // TODO do we want to collect info here for individual indels if they are only seen in multis? // Currently trying to solve this by only collecting for individuals if it seems likely that we're going to see reads that don't span both if (!isMulti || (readSpanNeededToCoverBoth > 25)) // TODO magic number { foreach (var indel in indels) { var indelKey = indel.ToString(); // TODO less gnarly var indelMetrics = IndelMetrics(lookup, indelKey); UpdateIndelMetrics(bamAlignment, isReputable, stitched, indelMetrics, indel, totalNm); } } if (isMulti) { var indelKey = string.Join("|", indels.Select(x => x.ToString())); // TODO less gnarly var indelMetrics = IndelMetrics(lookup, indelKey); // TODO - are read-level repeats that informative? Because this is kind of a perf burden // (^ Removed for now for that reason) bool isRepeat = false; //var isRepeat = StitchingLogic.OverlapEvaluator.IsRepeat(bamAlignment.Bases.Substring(0, (int)indels[0].LeftAnchor), 2, out repeatUnit) || StitchingLogic.OverlapEvaluator.IsRepeat(bamAlignment.Bases.Substring(0, (int)indels[1].RightAnchor), 2, out repeatUnit); AddReadLevelIndelMetrics(bamAlignment, isReputable, stitched, indelMetrics, isRepeat); AddMultiIndelMetrics(indelMetrics, indels, totalNm); } } return(indels); } return(null); }
public void AddAlignment(BamAlignment alignment, ReadNumber readNumber = ReadNumber.NA) { var alignmentCopy = new BamAlignment(alignment); if (alignmentCopy.IsPrimaryAlignment() && !alignmentCopy.IsSupplementaryAlignment()) { if (FragmentSize == 0) { FragmentSize = Math.Abs(alignmentCopy.FragmentLength); // Can be either F1R2 or F2R1 NormalPairOrientation = (!alignmentCopy.IsReverseStrand() && alignmentCopy.IsMateReverseStrand()) || (alignmentCopy.IsReverseStrand() && !alignmentCopy.IsMateReverseStrand()); if (NormalPairOrientation) { if (alignmentCopy.RefID == alignmentCopy.MateRefID) { if (!alignmentCopy.IsReverseStrand()) { if (alignmentCopy.Position > alignmentCopy.MatePosition) { // RF NormalPairOrientation = false; } } else { if (alignmentCopy.MatePosition > alignmentCopy.Position) { // RF NormalPairOrientation = false; } } } } } NumPrimaryReads++; bool useForPos = true; if (useForPos) { if (alignmentCopy.Position > MaxPosition) { MaxPosition = alignment.Position; } if (alignmentCopy.Position < MinPosition) { MinPosition = alignment.Position; } } if (readNumber == ReadNumber.NA) { if (Read1 != null && Read2 != null) { throw new InvalidDataException($"Already have both primary alignments for {alignment.Name}."); } if (Read1 == null) { Read1 = alignmentCopy; } else { Read2 = alignmentCopy; } } else if (readNumber == ReadNumber.Read1) { if (Read1 != null) { throw new InvalidDataException($"Already have a read 1 primary alignment for {alignment.Name}."); } Read1 = alignmentCopy; } else if (readNumber == ReadNumber.Read2) { if (Read2 != null) { throw new InvalidDataException($"Already have a read 2 primary alignment for {alignment.Name}."); } Read2 = alignmentCopy; } } else if (alignmentCopy.IsSupplementaryAlignment()) { switch (readNumber) { case ReadNumber.Read1: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SupplementaryAlignments == null) { Read2SupplementaryAlignments = new List <BamAlignment>(); } Read2SupplementaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SupplementaryAlignments == null) { Read1SupplementaryAlignments = new List <BamAlignment>(); } Read1SupplementaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } else { switch (readNumber) { case ReadNumber.Read1: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.Read2: if (Read2SecondaryAlignments == null) { Read2SecondaryAlignments = new List <BamAlignment>(); } Read2SecondaryAlignments.Add(alignmentCopy); break; case ReadNumber.NA: if (Read1SecondaryAlignments == null) { Read1SecondaryAlignments = new List <BamAlignment>(); } Read1SecondaryAlignments.Add(alignmentCopy); break; default: throw new ArgumentOutOfRangeException(nameof(readNumber), readNumber, null); } } // Set as improper once we add any alignment that is flagged as improper if (!alignmentCopy.IsProperPair()) { IsImproper = true; } }
/// <summary> /// Bins the fragment identified by alignment. Increases bin count if the first read of a pair passes all the filters. /// Decreases bin count if the second read of a pair does not pass all the filters. /// </summary> /// <param name="alignment"></param> /// <param name="qualityThreshold">minimum mapping quality</param> /// <param name="readNameToBinIndex">Dictionary of read name to bin index</param> /// <param name="usableFragmentCount">number of usable fragments</param> /// <param name="bins">predefined bins</param> /// <param name="binIndexStart">bin index from which to start searching for the best bin</param> public static void BinOneAlignment(BamAlignment alignment, uint qualityThreshold, Dictionary<string, int> readNameToBinIndex, HashSet<string> samePositionReadNames, ref long usableFragmentCount, List<GenomicBin> bins, ref int binIndexStart) { if (!alignment.IsMapped()) { return; } if (!alignment.IsMateMapped()) { return; } if (!alignment.IsPrimaryAlignment()) { return; } if (!(alignment.IsPaired() && alignment.IsProperPair())) { return; } bool duplicateFailedQCLowQuality = IsDuplicateFailedQCLowQuality(alignment, qualityThreshold); // Check whether we have binned the fragment using the mate if (readNameToBinIndex.ContainsKey(alignment.Name)) { // Undo binning when one of the reads is a duplicate, fails QC or has low mapping quality if (duplicateFailedQCLowQuality) { usableFragmentCount--; bins[readNameToBinIndex[alignment.Name]].Count--; } readNameToBinIndex.Remove(alignment.Name); // clean up return; } if (duplicateFailedQCLowQuality) { return; } if (alignment.RefID != alignment.MateRefID) { return; } // does this ever happen? if (IsRightMostInPair(alignment)) { return; } // look at only one read of the pair // handle the case where alignment.Position == alignment.MatePosition if (alignment.Position == alignment.MatePosition) { if (samePositionReadNames.Contains(alignment.Name)) { samePositionReadNames.Remove(alignment.Name); return; } samePositionReadNames.Add(alignment.Name); } if (alignment.FragmentLength == 0) { return; } // Janus-SRS-190: 0 when the information is unavailable // Try to bin the fragment int fragmentStart = alignment.Position; // 0-based, inclusive int fragmentStop = alignment.Position + alignment.FragmentLength; // 0-based, exclusive while (binIndexStart < bins.Count && bins[binIndexStart].Stop <= fragmentStart) // Bins[binIndexStart] on the left of the fragment { binIndexStart++; } if (binIndexStart >= bins.Count) { return; } // all the remaining fragments are on the right of the last bin // now Bins[binIndexStart].Stop > fragmentStart int bestBinIndex = FindBestBin(bins, binIndexStart, fragmentStart, fragmentStop); if (bestBinIndex >= 0) // Bin the fragment { usableFragmentCount++; bins[bestBinIndex].Count++; readNameToBinIndex[alignment.Name] = bestBinIndex; } }