public void TestIfBamIsStitched() { //test some generic bam var extractor = new BamFileAlignmentExtractor(Path.Combine(TestPaths.LocalTestDataDirectory, "unaligned.bam")); Assert.Equal(false, extractor.SourceIsStitched); //test to be robust to crazy bams. Assert.Equal(false, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched("")); Assert.Equal(false, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched("@PG @PG")); Assert.Equal(false, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched("blah")); Assert.Equal(false, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(null)); //test some real normal headers Assert.Equal(true, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(GetPiscesStitchedHeader())); Assert.Equal(false, BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(GetRegularHeader())); }
public void IntervalJumping_Ends() { var smallBam = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam"); var intervals = new Dictionary <string, List <Region> >(); var chrIntervals = new List <Region> { new Region(28608100, 28608100), // interval in the middle of coverage new Region(29608700, 29608800) // interval out in the boonies where there's no data }; intervals.Add("chr13", chrIntervals); var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals); var read = new Read(); var numReadsLessThan = 0; var numReadsGreaterThan = 0; // verify we are always moving forward and not backwards (not re-reading alignments) while (extractor.GetNextAlignment(read)) { if (read.EndPosition + 1 < 28608100) // bam reader is off by one, see note in BamFileExtractor.Jump { numReadsLessThan++; } else if (read.Position > 28608100) { numReadsGreaterThan++; } } Assert.Equal(1, numReadsLessThan); // this should be just the first read (before we figure out we're not in range) Assert.Equal(0, numReadsGreaterThan); }
public void SanityCheckSequenceOrdering() { var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "Ins-L3-var12_S12.bam"); var intervals = new Dictionary <string, List <Region> >(); var chrIntervals = new List <Region> { new Region(28607838, 28607838), new Region(28608631, 28608631) }; var expectedSQorder = new List <string> { "chr10", "chr11", "chr12", "chr13" }; //I dont know why it starts with 10, thats just how it is in the bam. thats what makes it a good test case. intervals.Add("chr13", chrIntervals); var extractor = new BamFileAlignmentExtractor(smallBam); List <string> sequencesInTheBamOrder = extractor.SourceReferenceList; Assert.Equal(expectedSQorder[0], sequencesInTheBamOrder[0]); Assert.Equal(expectedSQorder[1], sequencesInTheBamOrder[1]); Assert.Equal(expectedSQorder[3], sequencesInTheBamOrder[3]); Assert.Equal(25, sequencesInTheBamOrder.Count); //happyPath Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr1", "chr2" })); Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr1", "chr3", "chr4" })); Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr14", "chr9" })); //only b/c the bam header is silly. //not OK Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr2", "chr1" })); Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr9", "chr14" })); Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr22", "chr21" })); //genome has chr not in bam, be ok with it Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr1", "chrMotherGoose" })); //bam has chr not in genome, be ok with it Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { "chr1" })); //empty lists Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> { })); Assert.False(extractor.SequenceOrderingIsNotConsistent(null)); }
protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath) { var alignmentExtractor = new BamFileAlignmentExtractor(bamFilePath, _options.StitchReads, chrReference.Name); var mateFinder = _options.StitchReads ? new AlignmentMateFinder(Constants.MaxFragmentSize) : null; // jg todo - do we want to expose this to command line? var stitcher = _options.StitchReads ? CreateStitcher() : null; var config = new AlignmentSourceConfig { MinimumMapQuality = _options.MinimumMapQuality, OnlyUseProperPairs = _options.OnlyUseProperPairs, }; return(new AlignmentSource(alignmentExtractor, mateFinder, stitcher, config)); }
public void UnalignedReads() { var extractor = new BamFileAlignmentExtractor(Path.Combine(TestPaths.LocalTestDataDirectory, "unaligned.bam")); var read = new Read(); var count = 0; while (extractor.GetNextAlignment(read)) { count++; } Assert.Equal(138826, count); Assert.Equal(null, read.Chromosome); // last reads are unaligned }
public void ReadFileAsStitched() { var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "small.bam"); //we claim its not stitched var extractor = new BamFileAlignmentExtractor(smallBam, false); var read = new Read(); extractor.GetNextAlignment(read); Assert.False(extractor.SourceIsStitched); //we claim its stitched extractor = new BamFileAlignmentExtractor(smallBam, true); read = new Read(); extractor.GetNextAlignment(read); Assert.True(extractor.SourceIsStitched); }
public void IntervalJumping_Boundaries() { var smallBam = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam"); var intervals = new Dictionary <string, List <Region> >(); var chrIntervals = new List <Region> { new Region(115169880, 115169880) // feeding in an interval that's past reference max shouldnt cause it to blow up }; intervals.Add("chr13", chrIntervals); var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals); var read = new Read(); while (extractor.GetNextAlignment(read)) { } }
public void IntervalJumping_Middle() { var smallBam = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam"); var intervals = new Dictionary <string, List <Region> >(); var chrIntervals = new List <Region> { new Region(28607838, 28607838), new Region(28608631, 28608631) }; intervals.Add("chr13", chrIntervals); var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals); var read = new Read(); // verify we skip over the middle while (extractor.GetNextAlignment(read)) { Assert.True(read.Position < 28607840 || read.BamAlignment.GetEndPosition() >= 28608631); } }
protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, List <string> chrsToProcess = null) { AlignmentMateFinder mateFinder = null; var alignmentExtractor = new BamFileAlignmentExtractor(bamFilePath, chrReference.Name, _bamIntervalLookup.ContainsKey(bamFilePath) && _options.SkipNonIntervalAlignments ? _bamIntervalLookup[bamFilePath] : null); //Warn if the bam has sequences ordered differently to the reference genome. //That would confuse us because we will not know how the user wants to order the output gvcf. if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess)) { Logger.WriteToLog("Warning: Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath); Logger.WriteToLog("Variants will be ordered according to the reference genome"); } var config = new AlignmentSourceConfig { MinimumMapQuality = _options.MinimumMapQuality, OnlyUseProperPairs = _options.OnlyUseProperPairs, }; return(new AlignmentSource(alignmentExtractor, mateFinder, config)); }
protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, bool commandLineSaysStitched, List <string> chrsToProcess = null) { AlignmentMateFinder mateFinder = null; var alignmentExtractor = new BamFileAlignmentExtractor(bamFilePath, commandLineSaysStitched, chrReference.Name); //Warn if the bam has sequences ordered differently to the reference genome. //That would confuse us because we will not know how the user wants to order the output gvcf. if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess)) { Logger.WriteToLog("Warning: Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath); Logger.WriteToLog("Variants will be ordered according to the reference genome"); } var config = new AlignmentSourceConfig { MinimumMapQuality = _options.BamFilterParameters.MinimumMapQuality, OnlyUseProperPairs = _options.BamFilterParameters.OnlyUseProperPairs, SkipDuplicates = _options.BamFilterParameters.RemoveDuplicates }; return(new AlignmentSource(alignmentExtractor, mateFinder, config)); }
private void ReadFileTest(string bamfile, int expectedReads, bool bamHasXc) { var extractor = new BamFileAlignmentExtractor(bamfile); var read = new Read(); var lastPosition = -1; var numReads = 0; bool hasAnyStitchedCigars = false; while (extractor.GetNextAlignment(read)) { Assert.True(read.Position >= lastPosition); // make sure reads are read in order Assert.False(string.IsNullOrEmpty(read.Name)); Assert.False(string.IsNullOrEmpty(read.Chromosome)); if (!bamHasXc) { Assert.Equal(null, read.StitchedCigar); } if (read.StitchedCigar != null && read.StitchedCigar.Count > 0) { hasAnyStitchedCigars = true; } lastPosition = read.Position; numReads++; } if (bamHasXc) { Assert.True(hasAnyStitchedCigars); } Assert.Equal(expectedReads, numReads); extractor.Dispose(); // make sure can't read after dispose Assert.Throws <IOException>(() => extractor.GetNextAlignment(read)); }
public void IntervalJumping_SmallIntervals() { var smallBam = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam"); var intervals = new Dictionary <string, List <Region> >(); var chrIntervals = new List <Region> { new Region(28607838, 28607838), new Region(28607908, 28607908), new Region(28608631, 28608631) }; intervals.Add("chr13", chrIntervals); var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals); var read = new Read(); var lastPosition = 0; // verify we are always moving forward and not backwards (not re-reading alignments) while (extractor.GetNextAlignment(read)) { Assert.True(read.Position >= lastPosition); lastPosition = read.Position; } }