public void TestJump() { var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "bwaXC.bam"); using (var reader = new BamReader(smallBam)) { BamAlignment al = new BamAlignment(); Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 20200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position < 250); // now, forward-only jumping Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 20200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); // a good forward jump var position = reader.Tell(); Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 200)); Assert.Equal(position, reader.Tell()); // we stayed put Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); } }
/// <summary> /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file. /// </summary> protected void ProcessBamFile(string bamPath) { Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath); int overallCount = 0; int nextVariantIndex = 0; using (BamReader reader = new BamReader(bamPath)) { BamAlignment read = new BamAlignment(); int refID = reader.GetReferenceIndex(this.Chromosome); if (refID < 0) { throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath)); } Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome); reader.Jump(refID, 0); while (true) { bool result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (!read.HasPosition() || read.RefID > refID) { break; // We're past our chromosome of interest. } if (read.RefID < refID) { continue; // We're not yet on our chromosome of interest. } overallCount++; if (overallCount % 1000000 == 0) { Console.WriteLine("Record {0} at {1}...", overallCount, read.Position); } // Skip over unaligned or other non-count-worthy reads: if (!read.IsPrimaryAlignment()) { continue; } if (!read.IsMapped()) { continue; } if (read.IsDuplicate()) { continue; } if (read.MapQuality <= MinimumMapQ) { continue; } // Scan forward through the variants list, to keep up with our reads: while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position) { nextVariantIndex++; } if (nextVariantIndex >= this.Variants.Count) { break; } // If the read doesn't look like it has a reasonable chance of touching the next variant, continue: if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) { continue; } // This read potentially overlaps next variant (and further variants). Count bases! ProcessReadBases(read, nextVariantIndex); } } Console.WriteLine("Looped over {0} bam records in all", overallCount); }
/// <summary> /// Bins fragments. /// </summary> private void binFragments() { // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome! if (!Bam.Index.Exists) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName)); } long pairedAlignmentCount = 0; // keep track of paired alignments usableFragmentCount = 0; using (BamReader reader = new BamReader(Bam.BamFile.FullName)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(Chromosome); if (desiredRefIndex == -1) { throw new Illumina.Common.IlluminaException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } Dictionary <string, int> readNameToBinIndex = new Dictionary <string, int>(); HashSet <string> samePositionReadNames = new HashSet <string>(); int binIndexStart = 0; int prevPosition = -1; BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted { throw new Illumina.Common.IlluminaException( string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name)); } prevPosition = alignment.Position; if (alignment.IsPaired()) { pairedAlignmentCount++; } BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex, samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart); } } if (pairedAlignmentCount == 0) { throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName)); } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) { continue; } if (alignment.IsFailedQC()) { continue; } if (alignment.IsDuplicate()) { continue; } if (alignment.IsReverseStrand()) { continue; } if (!alignment.IsMainAlignment()) { continue; } // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) { continue; } if (isPairedEnd && !alignment.IsProperPair()) { continue; } int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }
public void JumpToChromosome(string chromosomeName) { _bamIndexFilter = _bamReader.GetReferenceIndex(chromosomeName); _bamReader.Jump(_bamIndexFilter, 0); }
public bool Jump(string chromosomeName, int positionIndex = 0) { var chrIndex = _references.First(r => r.Name == chromosomeName).Index; return(_bamReader.Jump(chrIndex, positionIndex)); }