private void InitializeReader(string chromosomeFilter = null) { _bamReader = new BamReader(_bamFilePath); _references = _bamReader.GetReferences().OrderBy(r => r.Index).ToList(); if (!_bamIsStitched) { _bamIsStitched = CheckBamHeaderIfBamHasBeenStitched(_bamReader.GetHeader()); } SourceIsCollapsed = CheckIfBamHasBeenCollapsed(_bamReader.GetHeader()); if (!string.IsNullOrEmpty(chromosomeFilter)) { var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter); if (chrReference == null) { throw new InvalidDataException(string.Format("Cannot set chr filter to '{0}'. This chr is not in the bam.", chromosomeFilter)); } _bamIndexFilter = chrReference.Index; } var chrToStart = !string.IsNullOrEmpty(chromosomeFilter) ? chromosomeFilter : _references.First().Name; Jump(chrToStart); }
private void InitializeReader(string chromosomeFilter = null) { _bamReader = new BamReader(_bamFilePath); _references = _bamReader.GetReferences().OrderBy(r => r.Index).ToList(); _bamIsStitched = CheckIfBamHasBeenStitched(_bamReader.GetHeader()); if (!string.IsNullOrEmpty(chromosomeFilter)) { var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter); if (chrReference == null) { throw new Exception(string.Format("Cannot set chr filter to '{0}'. This chr is not in the bam.", chromosomeFilter)); } _bamIndexFilter = chrReference.Index; } var chrToStart = !string.IsNullOrEmpty(chromosomeFilter) ? chromosomeFilter : _references.First().Name; var position = 0; if (_remainingIntervals != null && _remainingIntervals.ContainsKey(chrToStart)) { position = _remainingIntervals[chrToStart][0].StartPosition - 1; } Jump(chrToStart, position); }
private string GetHeader(string inBam) { using (var reader = new BamReader(inBam)) { var oldBamHeader = reader.GetHeader(); return(UpdateBamHeader(oldBamHeader)); } }
private void Initialize() { var baseReader = new BamReader(_inputBam); _chroms = baseReader.GetReferenceNames(); _header = baseReader.GetHeader(); _references = baseReader.GetReferences(); }
private BamWriter GetWriter(string outputFile) { using (var reader = new BamReader(_inputFile)) { var genome = reader.GetReferences(); string originalSamHeader = reader.GetHeader(); var updatedHeader = UpdateBamHeader(originalSamHeader); return(new BamWriter(outputFile, updatedHeader, genome)); } }
public void Initialize() { var outputDirectory = Path.GetDirectoryName(_outputFile); if (!Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); } using (var reader = new BamReader(_inputFile)) { var genome = reader.GetReferences(); _bamWriter = new BamWriter(_temp1File, reader.GetHeader(), genome); } }
private IBamWriter CreateBamWriter() { string bamHeader; List <GenomeMetadata.SequenceMetadata> bamReferences; var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { bamReferences = reader.GetReferences(); var oldBamHeader = reader.GetHeader(); bamHeader = UpdateBamHeader(oldBamHeader); foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } return(new BamWriterWrapper(new BamWriter(_outBam, bamHeader, bamReferences))); }
public void HappyPath() { //var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam"); //Assert.True(File.Exists(bamFilePath)); // TODO figure out how to access the shared bams var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath)) { File.Delete(tempPath); } using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>())) { bamWriter.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true)); } var bamWriterFactory = new BamWriterFactory(1, tempPath); var tempPath2 = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath2)) { File.Delete(tempPath2); } var bamWriterHandle = bamWriterFactory.CreateSingleBamWriter(tempPath2); bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCAG", 1, 10, 30, true)); bamWriterHandle.WriteAlignment(null); using (var reader = new BamReader(tempPath2)) { // TODO more specific? var header = reader.GetHeader(); Assert.Contains("ID:Gemini", header); Assert.Contains("PN:Gemini", header); } File.Delete(tempPath); File.Delete(tempPath2); }
private IBamWriterMultithreaded CreateBamWriter() { string bamHeader; List <GenomeMetadata.SequenceMetadata> bamReferences; var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { bamReferences = reader.GetReferences(); var oldBamHeader = reader.GetHeader(); bamHeader = UpdateBamHeader(oldBamHeader); foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } if (_options.SortMemoryGB <= 0) { return(new BamWriterMultithreaded(_outBam, bamHeader, bamReferences, _options.NumThreads, 1)); } return(new BamWriterInMem(_outBam, bamHeader, bamReferences, _options.SortMemoryGB, _options.NumThreads, 1)); }
/// <summary> /// Reads in a bam file and marks within the BitArrays which genomic mers are present. /// </summary> /// <param name="bamFile">bam file read alignments from.</param> /// <param name="observedAlignments">Dictioanry of BitArrays, one for each chromosome, to store the alignments in.</param> static void LoadObservedAlignmentsBAM(string bamFile, bool isPairedEnd, string chromosome, CanvasCoverageMode coverageMode, HitArray observed, Int16[] fragmentLengths) { // Sanity check: The .bai file must exist, in order for us to seek to our target chromosome! string indexPath = bamFile + ".bai"; if (!File.Exists(indexPath)) { throw new Exception(string.Format("Fatal error: Bam index not found at {0}", indexPath)); } using (BamReader reader = new BamReader(bamFile)) { int desiredRefIndex = -1; desiredRefIndex = reader.GetReferenceIndex(chromosome); if (desiredRefIndex == -1) { throw new ApplicationException( string.Format("Unable to retrieve the reference sequence index for {0} in {1}.", chromosome, bamFile)); } bool result = reader.Jump(desiredRefIndex, 0); if (!result) { // Note: This is not necessarily an error, it just means that there *are* no reads for this chromosome in this // .bam file. That is not uncommon e.g. for truseq amplicon. return; } int readCount = 0; int keptReadCount = 0; string header = reader.GetHeader(); BamAlignment alignment = new BamAlignment(); while (reader.GetNextAlignment(ref alignment, true)) { readCount++; // Flag check - Require reads to be aligned, passing filter, non-duplicate: if (!alignment.IsMapped()) { continue; } if (alignment.IsFailedQC()) { continue; } if (alignment.IsDuplicate()) { continue; } if (alignment.IsReverseStrand()) { continue; } if (!alignment.IsMainAlignment()) { continue; } // Require the alignment to start with 35 bases of non-indel: if (alignment.CigarData[0].Type != 'M' || alignment.CigarData[0].Length < 35) { continue; } if (isPairedEnd && !alignment.IsProperPair()) { continue; } int refID = alignment.RefID; // quit if the current reference index is different from the desired reference index if (refID != desiredRefIndex) { break; } if (refID == -1) { continue; } keptReadCount++; if (coverageMode == CanvasCoverageMode.Binary) { observed.Data[alignment.Position] = 1; } else { observed.Set(alignment.Position); } // store fragment size, make sure it's within Int16 range and is positive (simplification for now) if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[alignment.Position] = Convert.ToInt16(Math.Max(Math.Min(Int16.MaxValue, alignment.FragmentLength), 0)); } } Console.WriteLine("Kept {0} of {1} total reads", keptReadCount, readCount); } }