public void SanityCheckSequenceOrdering()
        {
            var smallBam     = Path.Combine(TestPaths.LocalTestDataDirectory, "Ins-L3-var12_S12.bam");
            var intervals    = new Dictionary <string, List <Region> >();
            var chrIntervals = new List <Region>
            {
                new Region(28607838, 28607838),
                new Region(28608631, 28608631)
            };
            var expectedSQorder = new List <string> {
                "chr10", "chr11", "chr12", "chr13"
            };                                                                             //I dont know why it starts with 10, thats just how it is in the bam. thats what makes it a good test case.

            intervals.Add("chr13", chrIntervals);
            var           extractor = new BamFileAlignmentExtractor(smallBam);
            List <string> sequencesInTheBamOrder = extractor.SourceReferenceList;

            Assert.Equal(expectedSQorder[0], sequencesInTheBamOrder[0]);
            Assert.Equal(expectedSQorder[1], sequencesInTheBamOrder[1]);
            Assert.Equal(expectedSQorder[3], sequencesInTheBamOrder[3]);
            Assert.Equal(25, sequencesInTheBamOrder.Count);

            //happyPath
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chr2"
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chr3", "chr4"
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr14", "chr9"
            }));                                                                                           //only b/c the bam header is silly.

            //not OK
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr2", "chr1"
            }));
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr9", "chr14"
            }));
            Assert.True(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr22", "chr21"
            }));

            //genome has chr not in bam, be ok with it
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1", "chrMotherGoose"
            }));

            //bam has chr not in genome, be ok with it
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
                "chr1"
            }));

            //empty lists
            Assert.False(extractor.SequenceOrderingIsNotConsistent(new List <string> {
            }));
            Assert.False(extractor.SequenceOrderingIsNotConsistent(null));
        }
Esempio n. 2
0
        protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, List <string> chrsToProcess = null)
        {
            AlignmentMateFinder mateFinder = null;
            var alignmentExtractor         = new BamFileAlignmentExtractor(bamFilePath, chrReference.Name, _bamIntervalLookup.ContainsKey(bamFilePath) && _options.SkipNonIntervalAlignments ? _bamIntervalLookup[bamFilePath] : null);

            //Warn if the bam has sequences ordered differently to the reference genome.
            //That would confuse us because we will not know how the user wants to order the output gvcf.
            if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess))
            {
                Logger.WriteToLog("Warning:  Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath);
                Logger.WriteToLog("Variants will be ordered according to the reference genome");
            }

            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = _options.MinimumMapQuality,
                OnlyUseProperPairs = _options.OnlyUseProperPairs,
            };

            return(new AlignmentSource(alignmentExtractor, mateFinder, config));
        }
Esempio n. 3
0
        protected virtual IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, bool commandLineSaysStitched, List <string> chrsToProcess = null)
        {
            AlignmentMateFinder mateFinder = null;
            var alignmentExtractor         = new BamFileAlignmentExtractor(bamFilePath, commandLineSaysStitched, chrReference.Name);

            //Warn if the bam has sequences ordered differently to the reference genome.
            //That would confuse us because we will not know how the user wants to order the output gvcf.
            if (alignmentExtractor.SequenceOrderingIsNotConsistent(chrsToProcess))
            {
                Logger.WriteToLog("Warning:  Reference sequences in the bam do not match the order of the reference sequences in the genome. Check bam " + bamFilePath);
                Logger.WriteToLog("Variants will be ordered according to the reference genome");
            }

            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = _options.BamFilterParameters.MinimumMapQuality,
                OnlyUseProperPairs = _options.BamFilterParameters.OnlyUseProperPairs,
                SkipDuplicates     = _options.BamFilterParameters.RemoveDuplicates
            };

            return(new AlignmentSource(alignmentExtractor, mateFinder, config));
        }