private void AdjustMates(string tmpFile, BamWriter writer) { // Second pass: Adjust flags on mates Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count); var read = new BamAlignment(); using (var reader = new BamReader(tmpFile)) { while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } // Adjust flags as needed: var mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1); RemapInfo info; if (!_remappings.TryGetValue(mateKey, out info)) { writer.WriteAlignment(read); continue; } if (info.Start == -1) { read.SetIsMateUnmapped(true); read.SetIsProperPair(false); read.FragmentLength = 0; } else { read.MatePosition = info.Start; } if (read.IsMateMapped() && read.IsProperPair()) { int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1; // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs read.FragmentLength = (read.Position < info.Start ? info.End - read.Position + 1 : info.Start - readEnd - 1); } writer.WriteAlignment(read); } } }
public static void Main(String[] args) { var aln = new BamAlignment(); var reader = new BamReader(); reader.Open(args[0]); for (int i = 0; i < 10; i++) { reader.GetNextAlignment(aln); Console.WriteLine("{0} {1}", aln.Name, aln.Length); var foo = aln.CigarData[0]; } }
public void Process(string inputBam, string outFolder, StitcherOptions stitcherOptions) { var jobManager = new JobManager(10); var jobs = new List <IJob>(); var perChromBams = new List <string>(); // Process each of the chromosomes separately foreach (var chrom in _chroms) { var intermediateOutput = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + "." + chrom + ".stitched.bam"); perChromBams.Add(intermediateOutput); var stitcher = new BamStitcher(inputBam, intermediateOutput, stitcherOptions, chrFilter: chrom); jobs.Add(new GenericJob(() => stitcher.Execute(), "Stitcher_" + chrom)); } jobManager.Process(jobs); // Combine the per-chromosome bams Logger.WriteToLog("Writing final bam."); var outputBam = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + ".final.stitched.bam"); using (var finalOutput = new BamWriter(outputBam, _header, _references)) { foreach (var bam in perChromBams) { Logger.WriteToLog("Adding " + bam + " to final bam."); var bamAlignment = new BamAlignment(); using (var bamReader = new BamReader(bam)) { while (true) { var hasMoreReads = bamReader.GetNextAlignment(ref bamAlignment, false); if (!hasMoreReads) { break; } finalOutput.WriteAlignment(bamAlignment); } } File.Delete(bam); } } Logger.WriteToLog("Finished combining per-chromosome bams into final bam at " + outputBam); }
public bool GetNextAlignment(Read read) { if (_bamReader == null) { throw new Exception("Already disposed."); } while (true) { Region currentInterval = null; if (_rawAlignment != null) { var currentChrIntervals = GetIntervalsForChr(_rawAlignment.RefID); if (currentChrIntervals != null) // null signals not to apply interval jumping { if (!JumpIfNeeded(currentChrIntervals, out currentInterval)) { Dispose(); return(false); } } } else { _rawAlignment = new BamAlignment(); // first time pass } if (!_bamReader.GetNextAlignment(ref _rawAlignment, false) || ((_bamIndexFilter > -1) && (_rawAlignment.RefID != _bamIndexFilter))) { Dispose(); return(false); } if (currentInterval == null || _rawAlignment.Position < currentInterval.EndPosition) { var reference = _references.FirstOrDefault(r => r.Index == _rawAlignment.RefID); read.Reset(reference?.Name, _rawAlignment); return(true); } // read off the end of the interval - keep looping to jump to the next one or scan to the end } }
public bool GetNextAlignment(Read read) { if (_bamReader == null) { throw new Exception("Already disposed."); } if (!_bamReader.GetNextAlignment(ref _rawAlignment, false) || (_bamIndexFilter > -1 && _rawAlignment.RefID != _bamIndexFilter)) { Dispose(); return(false); } read.Reset(_bamReader.GetReferenceNameByID(_rawAlignment.RefID), _rawAlignment, _stitchReads); return(true); }
private static void RunProcessorTest(string inBam, string outBam, string expBam, string outFolder, bool threadbyChr, StitcherOptions stitcherOptions) { if (File.Exists(outBam)) { File.Delete(outBam); } Logger.OpenLog(TestPaths.LocalScratchDirectory, "StitcherTestLog.txt", true); var processor = threadbyChr ? (IStitcherProcessor) new GenomeProcessor(inBam) : new BamProcessor(); processor.Process(inBam, outFolder, stitcherOptions); Logger.CloseLog(); Assert.True(File.Exists(outBam)); var observedAlignment = new BamAlignment(); var expectedAlignment = new BamAlignment(); using (var outReader = new BamReader(outBam)) using (var expReader = new BamReader(expBam)) { while (true) { var nextObservation = outReader.GetNextAlignment(ref observedAlignment, true); var nextExpected = expReader.GetNextAlignment(ref expectedAlignment, true); if ((nextExpected == false) || (expectedAlignment == null)) { break; } Assert.Equal(expectedAlignment.Bases, observedAlignment.Bases); Assert.Equal(expectedAlignment.Position, observedAlignment.Position); Assert.Equal(expectedAlignment.Qualities, observedAlignment.Qualities); } outReader.Close(); expReader.Close(); } }
/// <summary> /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file. /// </summary> private void WriteUnalignedReads(BamWriter writer) { Logger.WriteToLog("Writing unaligned reads"); using (var reader = new BamReader(_inputFile)) { reader.JumpToUnaligned(); var read = new BamAlignment(); while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (read.RefID != -1) { continue; // skip over last reads } writer.WriteAlignment(read); } } }
/// <summary> /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file. /// </summary> protected void ProcessBamFile(string bamPath) { Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath); int overallCount = 0; int nextVariantIndex = 0; using (BamReader reader = new BamReader(bamPath)) { BamAlignment read = new BamAlignment(); int refID = reader.GetReferenceIndex(this.Chromosome); if (refID < 0) { throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath)); } Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome); reader.Jump(refID, 0); while (true) { bool result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (!read.HasPosition() || read.RefID > refID) { break; // We're past our chromosome of interest. } if (read.RefID < refID) { continue; // We're not yet on our chromosome of interest. } overallCount++; if (overallCount % 1000000 == 0) { Console.WriteLine("Record {0} at {1}...", overallCount, read.Position); } // Skip over unaligned or other non-count-worthy reads: if (!read.IsPrimaryAlignment()) { continue; } if (!read.IsMapped()) { continue; } if (read.IsDuplicate()) { continue; } if (read.MapQuality <= MinimumMapQ) { continue; } // Scan forward through the variants list, to keep up with our reads: while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position) { nextVariantIndex++; } if (nextVariantIndex >= this.Variants.Count) { break; } // If the read doesn't look like it has a reasonable chance of touching the next variant, continue: if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) { continue; } // This read potentially overlaps next variant (and further variants). Count bases! ProcessReadBases(read, nextVariantIndex); } } Console.WriteLine("Looped over {0} bam records in all", overallCount); }
/// <summary> /// Bins fragments. /// </summary> private void binFragments() { // Sanity check: The BAM index file must exist, in order for us to seek to our target chromosome! if (!Bam.Index.Exists) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", Bam.Index.FullName)); } long pairedAlignmentCount = 0; // keep track of paired alignments usableFragmentCount = 0; using (BamReader reader = new BamReader(Bam.BamFile.FullName)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(Chromosome); if (desiredRefIndex == -1) { throw new Illumina.Common.IlluminaException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", Chromosome, Bam.BamFile.FullName)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } Dictionary <string, int> readNameToBinIndex = new Dictionary <string, int>(); HashSet <string> samePositionReadNames = new HashSet <string>(); int binIndexStart = 0; int prevPosition = -1; BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } if (alignment.Position < prevPosition) // Make sure the BAM is properly sorted { throw new Illumina.Common.IlluminaException( string.Format("The alignment on {0} are not properly sorted in {1}: {2}", Chromosome, Bam.BamFile.FullName, alignment.Name)); } prevPosition = alignment.Position; if (alignment.IsPaired()) { pairedAlignmentCount++; } BinOneAlignment(alignment, FragmentBinnerConstants.MappingQualityThreshold, readNameToBinIndex, samePositionReadNames, ref usableFragmentCount, Bins, ref binIndexStart); } } if (pairedAlignmentCount == 0) { throw new Illumina.Common.IlluminaException(string.Format("No paired alignments found for {0} in {1}", Chromosome, Bam.BamFile.FullName)); } }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) { continue; } if (alignment.IsFailedQC()) { continue; } if (alignment.IsDuplicate()) { continue; } if (alignment.IsReverseStrand()) { continue; } if (!alignment.IsMainAlignment()) { continue; } // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) { continue; } if (isPairedEnd && !alignment.IsProperPair()) { continue; } int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }
public void TestMultithreaded() { BamAlignment bamAlignment = new BamAlignment() { Bases = "ACGT", Bin = 0, CigarData = new CigarAlignment("4M"), Name = "Should have a constructor which initializes the members", Position = 1, Qualities = new byte[4], TagData = new byte[4] }; List <BamAlignment> bamAlignments = new List <BamAlignment>(); bamAlignments.Add(new BamAlignment(bamAlignment)); bamAlignment.Position = 2; bamAlignments.Add(new BamAlignment(bamAlignment)); bamAlignment.Position = 10; bamAlignments.Add(new BamAlignment(bamAlignment)); bamAlignment.Position = 11; bamAlignments.Add(new BamAlignment(bamAlignment)); MemoryStream memoryBuffer = new MemoryStream(); var str = new Mock <MemoryStream>(); str.Setup(x => x.Write(It.IsAny <byte[]>(), It.IsAny <int>(), It.IsAny <int>())).Callback <byte[], int, int>((buffer, offset, count) => { memoryBuffer.Write(buffer, offset, count); }); str.SetupGet(x => x.CanWrite).Returns(true); using (var bamWriter = new BamWriterMultithreaded( str.Object, "", new System.Collections.Generic.List <GenomeMetadata.SequenceMetadata>(), 2)) // 2 threads { var handles = bamWriter.GenerateHandles(); // Write 2 alignments on the first handle // The positions are 1 and 10 handles[0].WriteAlignment(bamAlignments[0]); handles[0].WriteAlignment(bamAlignments[2]); // Write 2 alignments on the second handle // The positions are 2 and 11 handles[1].WriteAlignment(bamAlignments[1]); handles[1].WriteAlignment(bamAlignments[3]); // This will sort and merge the alignments, and write the results to the stream bamWriter.Flush(); } memoryBuffer.Position = 0; BamReader bamReader = new BamReader(); bamReader.Open(memoryBuffer); var bamAlignmentsWritten = new List <BamAlignment>(); // Verify that all BamAlignment objects are found // and they are in the right order. for (int i = 0; i < 4; ++i) { BamAlignment al = new BamAlignment(); Assert.True(bamReader.GetNextAlignment(ref al, false)); bamAlignmentsWritten.Add(new BamAlignment(al)); } bamReader.Close(); bamReader.Dispose(); bamAlignmentsWritten.Sort((al1, al2) => (al1.Position.CompareTo(al2.Position))); for (int i = 0; i < 4; ++i) { Assert.Equal(bamAlignmentsWritten[i].Position, bamAlignments[i].Position); } }