public void TestIfBamIsStitched()
        {
            //test some generic bam
            var extractor = new BamFileAlignmentExtractor(Path.Combine(TestPaths.LocalTestDataDirectory, "unaligned.bam"));

            Assert.Equal(false, extractor.SourceIsStitched);


            //test to be robust to crazy bams.

            Assert.Equal(false,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(""));

            Assert.Equal(false,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched("@PG @PG"));

            Assert.Equal(false,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched("blah"));

            Assert.Equal(false,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(null));

            //test some real normal headers

            Assert.Equal(true,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(GetPiscesStitchedHeader()));

            Assert.Equal(false,
                         BamFileAlignmentExtractor.CheckBamHeaderIfBamHasBeenStitched(GetRegularHeader()));
        }
        public void IntervalJumping_Ends()
        {
            var smallBam     = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(28608100, 28608100),  // interval in the middle of coverage
                new Region(29608700, 29608800)   // interval out in the boonies where there's no data
            };

            intervals.Add("chr13", chrIntervals);
            var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals);

            var read                = new Read();
            var numReadsLessThan    = 0;
            var numReadsGreaterThan = 0;

            // verify we are always moving forward and not backwards (not re-reading alignments)
            while (extractor.GetNextAlignment(read))
            {
                if (read.EndPosition + 1 < 28608100) // bam reader is off by one, see note in BamFileExtractor.Jump
                {
                    numReadsLessThan++;
                }
                else if (read.Position > 28608100)
                {
                    numReadsGreaterThan++;
                }
            }

            Assert.Equal(1, numReadsLessThan);  // this should be just the first read (before we figure out we're not in range)
            Assert.Equal(0, numReadsGreaterThan);
        }
        public void SanityCheckSequenceOrdering()
        {
            var smallBam     = Path.Combine(TestPaths.LocalTestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(28607838, 28607838),
                new Region(28608631, 28608631)
            };
            var expectedSQorder = new List <string> {
                "chr10", "chr11", "chr12", "chr13"
            };                                                                             //I dont know why it starts with 10, thats just how it is in the bam. thats what makes it a good test case.

            intervals.Add("chr13", chrIntervals);
            var           extractor = new BamFileAlignmentExtractor(smallBam);
            List <string> sequencesInTheBamOrder = extractor.SourceReferenceList;

            Assert.Equal(expectedSQorder[0], sequencesInTheBamOrder[0]);
            Assert.Equal(expectedSQorder[1], sequencesInTheBamOrder[1]);
            Assert.Equal(expectedSQorder[3], sequencesInTheBamOrder[3]);
            Assert.Equal(25, sequencesInTheBamOrder.Count);

            //happyPath
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chr2"
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chr3", "chr4"
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr14", "chr9"
            }));                                                                                           //only b/c the bam header is silly.

            //not OK
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr2", "chr1"
            }));
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr9", "chr14"
            }));
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr22", "chr21"
            }));

            //genome has chr not in bam, be ok with it
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chrMotherGoose"
            }));

            //bam has chr not in genome, be ok with it
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1"
            }));

            //empty lists
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(null));
        }
Exemple #4
0
        protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath)
        {
            var alignmentExtractor = new BamFileAlignmentExtractor(bamFilePath, _options.StitchReads, chrReference.Name);
            var mateFinder         = _options.StitchReads ? new AlignmentMateFinder(Constants.MaxFragmentSize) : null; // jg todo - do we want to expose this to command line?
            var stitcher           = _options.StitchReads ? CreateStitcher() : null;
            var config             = new AlignmentSourceConfig
            {
                MinimumMapQuality  = _options.MinimumMapQuality,
                OnlyUseProperPairs = _options.OnlyUseProperPairs,
            };

            return(new AlignmentSource(alignmentExtractor, mateFinder, stitcher, config));
        }
        public void UnalignedReads()
        {
            var extractor = new BamFileAlignmentExtractor(Path.Combine(TestPaths.LocalTestDataDirectory, "unaligned.bam"));

            var read  = new Read();
            var count = 0;

            while (extractor.GetNextAlignment(read))
            {
                count++;
            }

            Assert.Equal(138826, count);
            Assert.Equal(null, read.Chromosome); // last reads are unaligned
        }
        public void ReadFileAsStitched()
        {
            var smallBam = Path.Combine(TestPaths.LocalTestDataDirectory, "small.bam");

            //we claim its not stitched
            var extractor = new BamFileAlignmentExtractor(smallBam, false);
            var read      = new Read();

            extractor.GetNextAlignment(read);
            Assert.False(extractor.SourceIsStitched);


            //we claim its stitched
            extractor = new BamFileAlignmentExtractor(smallBam, true);
            read      = new Read();
            extractor.GetNextAlignment(read);
            Assert.True(extractor.SourceIsStitched);
        }
        public void IntervalJumping_Boundaries()
        {
            var smallBam     = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(115169880, 115169880)  // feeding in an interval that's past reference max shouldnt cause it to blow up
            };

            intervals.Add("chr13", chrIntervals);
            var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals);

            var read = new Read();

            while (extractor.GetNextAlignment(read))
            {
            }
        }
        public void IntervalJumping_Middle()
        {
            var smallBam     = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(28607838, 28607838),
                new Region(28608631, 28608631)
            };

            intervals.Add("chr13", chrIntervals);
            var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals);

            var read = new Read();

            // verify we skip over the middle
            while (extractor.GetNextAlignment(read))
            {
                Assert.True(read.Position < 28607840 || read.BamAlignment.GetEndPosition() >= 28608631);
            }
        }
Exemple #9
0
        protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, List <string> chrsToProcess = null)
        {
            AlignmentMateFinder mateFinder = null;
            var alignmentExtractor         = new BamFileAlignmentExtractor(bamFilePath, chrReference.Name, _bamIntervalLookup.ContainsKey(bamFilePath) && _options.SkipNonIntervalAlignments ? _bamIntervalLookup[bamFilePath] : null);

            //Warn if the bam has sequences ordered differently to the reference genome.
            //That would confuse us because we will not know how the user wants to order the output gvcf.
            if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess))
            {
                Logger.WriteToLog("Warning:  Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath);
                Logger.WriteToLog("Variants will be ordered according to the reference genome");
            }

            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = _options.MinimumMapQuality,
                OnlyUseProperPairs = _options.OnlyUseProperPairs,
            };

            return(new AlignmentSource(alignmentExtractor, mateFinder, config));
        }
Exemple #10
0
        protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, bool commandLineSaysStitched, List <string> chrsToProcess = null)
        {
            AlignmentMateFinder mateFinder = null;
            var alignmentExtractor         = new BamFileAlignmentExtractor(bamFilePath, commandLineSaysStitched, chrReference.Name);

            //Warn if the bam has sequences ordered differently to the reference genome.
            //That would confuse us because we will not know how the user wants to order the output gvcf.
            if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess))
            {
                Logger.WriteToLog("Warning:  Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath);
                Logger.WriteToLog("Variants will be ordered according to the reference genome");
            }

            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = _options.BamFilterParameters.MinimumMapQuality,
                OnlyUseProperPairs = _options.BamFilterParameters.OnlyUseProperPairs,
                SkipDuplicates     = _options.BamFilterParameters.RemoveDuplicates
            };

            return(new AlignmentSource(alignmentExtractor, mateFinder, config));
        }
        private void ReadFileTest(string bamfile, int expectedReads, bool bamHasXc)
        {
            var extractor = new BamFileAlignmentExtractor(bamfile);

            var read         = new Read();
            var lastPosition = -1;
            var numReads     = 0;

            bool hasAnyStitchedCigars = false;

            while (extractor.GetNextAlignment(read))
            {
                Assert.True(read.Position >= lastPosition); // make sure reads are read in order
                Assert.False(string.IsNullOrEmpty(read.Name));
                Assert.False(string.IsNullOrEmpty(read.Chromosome));

                if (!bamHasXc)
                {
                    Assert.Equal(null, read.StitchedCigar);
                }
                if (read.StitchedCigar != null && read.StitchedCigar.Count > 0)
                {
                    hasAnyStitchedCigars = true;
                }
                lastPosition = read.Position;
                numReads++;
            }

            if (bamHasXc)
            {
                Assert.True(hasAnyStitchedCigars);
            }
            Assert.Equal(expectedReads, numReads);
            extractor.Dispose();

            // make sure can't read after dispose
            Assert.Throws <IOException>(() => extractor.GetNextAlignment(read));
        }
        public void IntervalJumping_SmallIntervals()
        {
            var smallBam     = Path.Combine(UnitTestPaths.TestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(28607838, 28607838),
                new Region(28607908, 28607908),
                new Region(28608631, 28608631)
            };

            intervals.Add("chr13", chrIntervals);
            var extractor = new BamFileAlignmentExtractor(smallBam, bamIntervals: intervals);

            var read         = new Read();
            var lastPosition = 0;

            // verify we are always moving forward and not backwards (not re-reading alignments)
            while (extractor.GetNextAlignment(read))
            {
                Assert.True(read.Position >= lastPosition);
                lastPosition = read.Position;
            }
        }