/// <summary> /// Gets chromosomes in parameters.bamFile. /// </summary> /// <returns></returns> private List <string> GetChromosomesInBam() { using (BamReader reader = new BamReader(parameters.bamFile)) { return(reader.GetReferenceNames()); } }
private void InitializeReader(string chromosomeFilter = null) { _bamReader = new BamReader(_bamFilePath); _references = _bamReader.GetReferences().OrderBy(r => r.Index).ToList(); if (!_bamIsStitched) { _bamIsStitched = CheckBamHeaderIfBamHasBeenStitched(_bamReader.GetHeader()); } SourceIsCollapsed = CheckIfBamHasBeenCollapsed(_bamReader.GetHeader()); if (!string.IsNullOrEmpty(chromosomeFilter)) { var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter); if (chrReference == null) { throw new InvalidDataException(string.Format("Cannot set chr filter to '{0}'. This chr is not in the bam.", chromosomeFilter)); } _bamIndexFilter = chrReference.Index; } var chrToStart = !string.IsNullOrEmpty(chromosomeFilter) ? chromosomeFilter : _references.First().Name; Jump(chrToStart); }
public Genome GetReferenceGenome(string genomePath) { var chromosomeNames = new List <string>(); var bamWorkRequests = WorkRequests.Where( w => w.GenomeDirectory.Equals(genomePath, StringComparison.InvariantCultureIgnoreCase)).ToList(); for (var i = 0; i < bamWorkRequests.Count(); i++) { var bamFilePath = bamWorkRequests[i].BamFilePath; List <string> bamChromosomes; using (var reader = new BamReader(bamFilePath)) { bamChromosomes = reader.GetReferenceNames(); } // load intervals and filter chromosomes if necessary var bamIntervals = _bamIntervalLookup.ContainsKey(bamFilePath) ? _bamIntervalLookup[bamFilePath] : null; chromosomeNames.AddRange(bamIntervals == null ? bamChromosomes : bamChromosomes.Where(bamIntervals.ContainsKey)); } var genome = new Genome(genomePath, chromosomeNames.Distinct().ToList()); return(genome); }
private static string GetReadGroupSample(Bam bam) { using (var reader = new BamReader(bam.BamFile)) { return(reader.GetReadGroupSample()); } }
//wrapper should now handle all throwing and catching.. protected override void ProgramExecution() { var optionsUsed = _appOptionParser.ParsingResult.OptionsUsed; var doNotPassToSubprocess = new List <string>() { "outFolder", "numProcesses", "exePath", "intermediateDir", "multiprocess", "chromosomes" }; var cmdLineList = MultiProcessHelpers.GetCommandLineWithoutIgnoredArguments(optionsUsed, doNotPassToSubprocess); var refNameMapping = new Dictionary <string, int>(); using (var bamReader = new BamReader(_options.InputBam)) { var chroms = bamReader.GetReferenceNames(); foreach (var referenceName in chroms) { if (_options.Chromosomes != null && !_options.Chromosomes.ToList().Contains(referenceName)) { continue; } refNameMapping.Add(referenceName, bamReader.GetReferenceIndex(referenceName)); } } var taskManager = new CliTaskManager(_options.NumProcesses); var geminiProcessor = new GeminiMultiProcessor(_options, new CliTaskCreator()); var samtoolsWrapper = new SamtoolsWrapper(_options.GeminiOptions.SamtoolsPath, _options.GeminiOptions.IsWeirdSamtools); geminiProcessor.Execute(taskManager, refNameMapping, cmdLineList, samtoolsWrapper); }
private void InitializeReader(string chromosomeFilter = null) { _bamReader = new BamReader(_bamFilePath); _references = _bamReader.GetReferences().OrderBy(r => r.Index).ToList(); _bamIsStitched = CheckIfBamHasBeenStitched(_bamReader.GetHeader()); if (!string.IsNullOrEmpty(chromosomeFilter)) { var chrReference = _references.FirstOrDefault(r => r.Name == chromosomeFilter); if (chrReference == null) { throw new Exception(string.Format("Cannot set chr filter to '{0}'. This chr is not in the bam.", chromosomeFilter)); } _bamIndexFilter = chrReference.Index; } var chrToStart = !string.IsNullOrEmpty(chromosomeFilter) ? chromosomeFilter : _references.First().Name; var position = 0; if (_remainingIntervals != null && _remainingIntervals.ContainsKey(chrToStart)) { position = _remainingIntervals[chrToStart][0].StartPosition - 1; } Jump(chrToStart, position); }
public void TestJump() { var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "bwaXC.bam"); using (var reader = new BamReader(smallBam)) { BamAlignment al = new BamAlignment(); Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 20200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); Assert.True(reader.Jump(reader.GetReferenceIndex("chr1"), 200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position < 250); // now, forward-only jumping Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 20200)); Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); // a good forward jump var position = reader.Tell(); Assert.True(reader.JumpForward(reader.GetReferenceIndex("chr1"), 200)); Assert.Equal(position, reader.Tell()); // we stayed put Assert.True(reader.GetNextAlignment(ref al, true)); Assert.True(al.Position > 18000); } }
private void Initialize() { var baseReader = new BamReader(_inputBam); _chroms = baseReader.GetReferenceNames(); _header = baseReader.GetHeader(); _references = baseReader.GetReferences(); }
private string GetHeader(string inBam) { using (var reader = new BamReader(inBam)) { var oldBamHeader = reader.GetHeader(); return(UpdateBamHeader(oldBamHeader)); } }
private Dictionary <string, IFileLocation> MapSampleNameToBam(List <IFileLocation> bams) { var map = new Dictionary <string, IFileLocation>(); foreach (IFileLocation bam in bams) { BamReader.WrapException(bam, reader => map.Add(reader.GetReadGroupSample(), bam)); } return(map); }
private BamWriter GetWriter(string outputFile) { using (var reader = new BamReader(_inputFile)) { var genome = reader.GetReferences(); string originalSamHeader = reader.GetHeader(); var updatedHeader = UpdateBamHeader(originalSamHeader); return(new BamWriter(outputFile, updatedHeader, genome)); } }
public Genome SetGenome(ScyllaApplicationOptions options) { var bamChromosomes = new List <string>() { }; using (var reader = new BamReader(options.BamPath)) { bamChromosomes = reader.GetReferenceNames(); } return(new Genome(options.GenomePath, bamChromosomes)); }
private void AdjustMates(string tmpFile, BamWriter writer) { // Second pass: Adjust flags on mates Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count); var read = new BamAlignment(); using (var reader = new BamReader(tmpFile)) { while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } // Adjust flags as needed: var mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1); RemapInfo info; if (!_remappings.TryGetValue(mateKey, out info)) { writer.WriteAlignment(read); continue; } if (info.Start == -1) { read.SetIsMateUnmapped(true); read.SetIsProperPair(false); read.FragmentLength = 0; } else { read.MatePosition = info.Start; } if (read.IsMateMapped() && read.IsProperPair()) { int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1; // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs read.FragmentLength = (read.Position < info.Start ? info.End - read.Position + 1 : info.Start - readEnd - 1); } writer.WriteAlignment(read); } } }
public void Dispose() { try { if (_bamReader != null) { _bamReader.Dispose(); _bamReader = null; } } catch (Exception) { // swallow it } }
public static void Main(String[] args) { var aln = new BamAlignment(); var reader = new BamReader(); reader.Open(args[0]); for (int i = 0; i < 10; i++) { reader.GetNextAlignment(aln); Console.WriteLine("{0} {1}", aln.Name, aln.Length); var foo = aln.CigarData[0]; } }
public void Process(string inputBam, string outFolder, StitcherOptions stitcherOptions) { var jobManager = new JobManager(10); var jobs = new List <IJob>(); var perChromBams = new List <string>(); // Process each of the chromosomes separately foreach (var chrom in _chroms) { var intermediateOutput = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + "." + chrom + ".stitched.bam"); perChromBams.Add(intermediateOutput); var stitcher = new BamStitcher(inputBam, intermediateOutput, stitcherOptions, chrFilter: chrom); jobs.Add(new GenericJob(() => stitcher.Execute(), "Stitcher_" + chrom)); } jobManager.Process(jobs); // Combine the per-chromosome bams Logger.WriteToLog("Writing final bam."); var outputBam = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + ".final.stitched.bam"); using (var finalOutput = new BamWriter(outputBam, _header, _references)) { foreach (var bam in perChromBams) { Logger.WriteToLog("Adding " + bam + " to final bam."); var bamAlignment = new BamAlignment(); using (var bamReader = new BamReader(bam)) { while (true) { var hasMoreReads = bamReader.GetNextAlignment(ref bamAlignment, false); if (!hasMoreReads) { break; } finalOutput.WriteAlignment(bamAlignment); } } File.Delete(bam); } } Logger.WriteToLog("Finished combining per-chromosome bams into final bam at " + outputBam); }
public void Initialize() { var outputDirectory = Path.GetDirectoryName(_outputFile); if (!Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); } using (var reader = new BamReader(_inputFile)) { var genome = reader.GetReferences(); _bamWriter = new BamWriter(_temp1File, reader.GetHeader(), genome); } }
private List <GenomeMetadata.SequenceMetadata> GetReferences(string inBam) { List <GenomeMetadata.SequenceMetadata> bamReferences; var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(inBam)) { bamReferences = reader.GetReferences(); foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } return(bamReferences); }
private IReadPairHandler CreatePairHandler(ReadStatusCounter readStatuses) { var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases, nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength); var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } return(new PairHandler(refIdMapping, stitcher, _options.FilterUnstitchablePairs, readStatuses)); }
public void Dispose() { try { if (_bamReader != null) { _bamReader.Dispose(); _bamReader = null; } } catch (Exception ex) { // swallow it var wrappedException = new Exception("Error disposing BamReader: " + ex.Message, ex); Logger.WriteExceptionToLog(wrappedException); } }
public Genome GetReferenceGenome(string genomePath) { var chromosomeNames = new List <string>(); var bamWorkRequests = WorkRequests.Where( w => w.GenomeDirectory.Equals(genomePath, StringComparison.CurrentCultureIgnoreCase)).ToList(); for (var i = 0; i < bamWorkRequests.Count(); i++) { var bamFilePath = bamWorkRequests[i].BamFilePath; List <string> bamChromosomes; using (var reader = new BamReader(bamFilePath)) { bamChromosomes = reader.GetReferenceNames(); } var filteredChromosomes = FilterBamChromosomes(bamChromosomes, bamFilePath); if (!string.IsNullOrEmpty(_baseOptions.ChromosomeFilter)) { filteredChromosomes.RemoveAll(c => c != _baseOptions.ChromosomeFilter); } chromosomeNames.AddRange(filteredChromosomes); } var genome = new Genome(genomePath, chromosomeNames.Distinct().ToList()); if (genome.ChromosomesToProcess.Count() < chromosomeNames.Distinct().Count()) { Logger.WriteToLog("Warning: Not all requested sequences were found in {0} to process.", genome.GetGenomeBuild()); Logger.WriteToLog("Check BAM file matches reference genome."); if (string.IsNullOrEmpty(_baseOptions.ChromosomeFilter)) { Logger.WriteToLog("Requested sequences: {0}", (string.Join(",", chromosomeNames.Distinct().ToList()))); } else { Logger.WriteToLog("Requested sequences: {0}", _baseOptions.ChromosomeFilter); } } return(genome); }
private static IParsingResult <SmallPedigreeSampleOptions> Parse(IFileLocation bam, SampleType sampleType, string sampleName) { if (sampleName == null) { Action a = () => { BamReader.WrapException(bam, reader => { sampleName = reader.GetReadGroupSample(); }); }; if (!a.Try(out Exception e)) { return(ParsingResult <SmallPedigreeSampleOptions> .FailedResult(e.Message)); } } return(ParsingResult <SmallPedigreeSampleOptions> .SuccessfulResult(new SmallPedigreeSampleOptions(sampleName, sampleType, bam))); }
private static void RunProcessorTest(string inBam, string outBam, string expBam, string outFolder, bool threadbyChr, StitcherOptions stitcherOptions) { if (File.Exists(outBam)) { File.Delete(outBam); } Logger.OpenLog(TestPaths.LocalScratchDirectory, "StitcherTestLog.txt", true); var processor = threadbyChr ? (IStitcherProcessor) new GenomeProcessor(inBam) : new BamProcessor(); processor.Process(inBam, outFolder, stitcherOptions); Logger.CloseLog(); Assert.True(File.Exists(outBam)); var observedAlignment = new BamAlignment(); var expectedAlignment = new BamAlignment(); using (var outReader = new BamReader(outBam)) using (var expReader = new BamReader(expBam)) { while (true) { var nextObservation = outReader.GetNextAlignment(ref observedAlignment, true); var nextExpected = expReader.GetNextAlignment(ref expectedAlignment, true); if ((nextExpected == false) || (expectedAlignment == null)) { break; } Assert.Equal(expectedAlignment.Bases, observedAlignment.Bases); Assert.Equal(expectedAlignment.Position, observedAlignment.Position); Assert.Equal(expectedAlignment.Qualities, observedAlignment.Qualities); } outReader.Close(); expReader.Close(); } }
private IBamWriter CreateBamWriter() { string bamHeader; List <GenomeMetadata.SequenceMetadata> bamReferences; var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { bamReferences = reader.GetReferences(); var oldBamHeader = reader.GetHeader(); bamHeader = UpdateBamHeader(oldBamHeader); foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } return(new BamWriterWrapper(new BamWriter(_outBam, bamHeader, bamReferences))); }
public void HappyPath() { //var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam"); //Assert.True(File.Exists(bamFilePath)); // TODO figure out how to access the shared bams var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath)) { File.Delete(tempPath); } using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>())) { bamWriter.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true)); } var bamWriterFactory = new BamWriterFactory(1, tempPath); var tempPath2 = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath2)) { File.Delete(tempPath2); } var bamWriterHandle = bamWriterFactory.CreateSingleBamWriter(tempPath2); bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCAG", 1, 10, 30, true)); bamWriterHandle.WriteAlignment(null); using (var reader = new BamReader(tempPath2)) { // TODO more specific? var header = reader.GetHeader(); Assert.Contains("ID:Gemini", header); Assert.Contains("PN:Gemini", header); } File.Delete(tempPath); File.Delete(tempPath2); }
public BamFileAlignmentExtractor(string bamFilePath, bool stitchReads, string chromosomeFilter = null) { _stitchReads = stitchReads; if (!File.Exists(bamFilePath)) { throw new ArgumentException(string.Format("Bam file '{0}' does not exist.", bamFilePath)); } if (!File.Exists(bamFilePath + ".bai")) { throw new ArgumentException(string.Format("Bai file '{0}.bai' does not exist.", bamFilePath)); } _bamReader = new BamReader(bamFilePath); ChromosomeFilter = chromosomeFilter; if (!string.IsNullOrEmpty(ChromosomeFilter)) { JumpToChromosome(ChromosomeFilter); } }
/// <summary> /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file. /// </summary> private void WriteUnalignedReads(BamWriter writer) { Logger.WriteToLog("Writing unaligned reads"); using (var reader = new BamReader(_inputFile)) { reader.JumpToUnaligned(); var read = new BamAlignment(); while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (read.RefID != -1) { continue; // skip over last reads } writer.WriteAlignment(read); } } }
private List <IReadPairHandler> CreatePairHandlers(ReadStatusCounter readStatuses, int numThreads) { var handlers = new List <IReadPairHandler>(numThreads); var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } for (int i = 0; i < numThreads; ++i) { var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases, nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength, ignoreReadsAboveMaxLength: _options.IgnoreReadsAboveMaxLength, minMapQuality: _options.FilterMinMapQuality, dontStitchHomopolymerBridge: _options.DontStitchHomopolymerBridge); handlers.Add(new PairHandler(refIdMapping, stitcher, readStatuses, _options.FilterUnstitchablePairs, true)); } return(handlers); }
private IBamWriterMultithreaded CreateBamWriter() { string bamHeader; List <GenomeMetadata.SequenceMetadata> bamReferences; var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { bamReferences = reader.GetReferences(); var oldBamHeader = reader.GetHeader(); bamHeader = UpdateBamHeader(oldBamHeader); foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } if (_options.SortMemoryGB <= 0) { return(new BamWriterMultithreaded(_outBam, bamHeader, bamReferences, _options.NumThreads, 1)); } return(new BamWriterInMem(_outBam, bamHeader, bamReferences, _options.SortMemoryGB, _options.NumThreads, 1)); }
/// <summary> /// Step 2: Get the ref and variant allele frequencies for the variants of interest, in the tumor bam file. /// </summary> protected void ProcessBamFile(string bamPath) { Console.WriteLine("{0} Looping over bam records from {1}", DateTime.Now, bamPath); int overallCount = 0; int nextVariantIndex = 0; using (BamReader reader = new BamReader(bamPath)) { BamAlignment read = new BamAlignment(); int refID = reader.GetReferenceIndex(this.Chromosome); if (refID < 0) { throw new ArgumentException(string.Format("Error: Chromosome name '{0}' does not match bam file at '{1}'", this.Chromosome, bamPath)); } Console.WriteLine("Jump to refid {0} {1}", refID, this.Chromosome); reader.Jump(refID, 0); while (true) { bool result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (!read.HasPosition() || read.RefID > refID) { break; // We're past our chromosome of interest. } if (read.RefID < refID) { continue; // We're not yet on our chromosome of interest. } overallCount++; if (overallCount % 1000000 == 0) { Console.WriteLine("Record {0} at {1}...", overallCount, read.Position); } // Skip over unaligned or other non-count-worthy reads: if (!read.IsPrimaryAlignment()) { continue; } if (!read.IsMapped()) { continue; } if (read.IsDuplicate()) { continue; } if (read.MapQuality <= MinimumMapQ) { continue; } // Scan forward through the variants list, to keep up with our reads: while (nextVariantIndex < this.Variants.Count && this.Variants[nextVariantIndex].ReferencePosition < read.Position) { nextVariantIndex++; } if (nextVariantIndex >= this.Variants.Count) { break; } // If the read doesn't look like it has a reasonable chance of touching the next variant, continue: if (read.Position + 1000 < this.Variants[nextVariantIndex].ReferencePosition) { continue; } // This read potentially overlaps next variant (and further variants). Count bases! ProcessReadBases(read, nextVariantIndex); } } Console.WriteLine("Looped over {0} bam records in all", overallCount); }