private static List <BamAlignment> ExtractReadsFromRealignerAndCombiner(PairResult pair, string refSeq,
                                                                                int refSeqOffset, List <PreIndel> preIndels, bool hasExistingIndels = false)
        {
            var stitchedPairHandler =
                new PairHandler(new Dictionary <int, string>()
            {
                { 1, "chr1" }
            }, new BasicStitcher(0), tryStitch: true);

            var snippetSource = new Mock <IGenomeSnippetSource>();
            var genomeSnippet = new GenomeSnippet()
            {
                Chromosome    = "chr1",
                Sequence      = new string('A', refSeqOffset) + refSeq + new string('T', 1000),
                StartPosition = 0
            };

            snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet);
            var mockStatusHandler = new Mock <IStatusHandler>();
            var comparer          = new GemBasicAlignmentComparer(false, false);

            var readRealigner = new GeminiReadRealigner(comparer, remaskSoftclips: false,
                                                        keepProbeSoftclips: false, keepBothSideSoftclips: false,
                                                        trackActualMismatches: false, checkSoftclipsForMismatches: true,
                                                        debug: false, maskNsOnly: false, maskPartialInsertion: false,
                                                        minimumUnanchoredInsertionLength: 1,
                                                        minInsertionSizeToAllowMismatchingBases: 4,
                                                        maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here...

            var filterer = new Mock <IRegionFilterer>();

            filterer.Setup(x => x.AnyIndelsNearby(It.IsAny <int>())).Returns(true);

            var indels               = preIndels.Select(x => HashableIndelSource.GetHashableIndel(genomeSnippet, x, 0, false)).ToList();
            var indelSource          = new ChromosomeIndelSource(indels, snippetSource.Object);
            var realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                                new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);

            var combiner = new ReadPairRealignerAndCombiner(new NonSnowballEvidenceCollector(),
                                                            new PostRealignmentStitcher(stitchedPairHandler, new DebugStatusHandler(new ReadStatusCounter())),
                                                            realignmentEvaluator, new PairSpecificIndelFinder(), "chr1", false, hasExistingIndels: hasExistingIndels);
            var nmCalc = new NmCalculator(snippetSource.Object);

            var result = combiner.ExtractReads(pair, nmCalc);

            return(result);
        }
        public void GetFinalAlignment_NonMock()
        {
            var snippetSource = new Mock <IGenomeSnippetSource>();
            var genomeSnippet = new GenomeSnippet()
            {
                Chromosome    = "chr1",
                Sequence      = new string('A', 1000) + "ATCGATTGA" + new string('T', 1000),
                StartPosition = 1000
            };

            snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet);
            var mockStatusHandler = new Mock <IStatusHandler>();
            var comparer          = new GemBasicAlignmentComparer(false, false);

            var readRealigner = new GeminiReadRealigner(comparer, remaskSoftclips: false,
                                                        keepProbeSoftclips: false, keepBothSideSoftclips: false,
                                                        trackActualMismatches: false, checkSoftclipsForMismatches: true,
                                                        debug: false, maskNsOnly: false, maskPartialInsertion: false,
                                                        minimumUnanchoredInsertionLength: 1,
                                                        minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here...

            var filterer = GetMockRegionFilterer();

            var indels               = new List <HashableIndel>();
            var indelSource          = new ChromosomeIndelSource(indels, snippetSource.Object);
            var realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                                new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);

            var origBamAlignment =
                TestHelpers.CreateBamAlignment("AAAAAAATTCA", 1500, 1500, 30, true, cigar: new CigarAlignment("11M"));
            var realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out bool changed, out bool forcedSoftclip,
                                                                   out bool confirmed, out bool sketchy);

            // No indels
            Assert.False(changed);
            Assert.False(confirmed);

            indels = new List <HashableIndel>()
            {
                new HashableIndel()
                {
                    Chromosome        = "chr1",
                    ReferencePosition = 1506,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "ATT",
                    Type   = AlleleCategory.Insertion,
                    Length = 2
                }
            };
            indelSource          = new ChromosomeIndelSource(indels, snippetSource.Object);
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out changed, out forcedSoftclip,
                                                               out confirmed, out sketchy);
            Assert.True(changed);
            Assert.False(confirmed);
            Assert.Equal("7M2I2M", realigned.CigarData.ToString());

            var confirmedAccepteds = new List <HashableIndel>();

            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            var reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip,
                                                                     out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds);

            Assert.False(changed);
            Assert.True(confirmed);
            Assert.Equal("7M2I2M", reRealigned.CigarData.ToString());

            // Existing indel is best (and only)
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip,
                                                                 out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATT", AlleleCategory.Insertion))
            });
            Assert.False(changed);
            Assert.True(confirmed);
            Assert.Equal("7M2I2M", reRealigned.CigarData.ToString());

            // Existing indel is unsanctioned but good fit - keep it
            var alignmentWithInsertion =
                TestHelpers.CreateBamAlignment("AAAAAAATTCA", 1500, 1500, 30, true, cigar: new CigarAlignment("7M3I1M"));

            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, false, filterer.Object, false);

            var realignedExistingIns = realignmentEvaluator.GetFinalAlignment(alignmentWithInsertion, out changed, out forcedSoftclip,
                                                                              out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion))
            });

            Assert.False(changed);
            Assert.False(confirmed);
            Assert.Equal("7M3I1M", realignedExistingIns.CigarData.ToString());

            // Existing indel is unsanctioned and we're softclipping unknowns - softclip it
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);

            realignedExistingIns = realignmentEvaluator.GetFinalAlignment(alignmentWithInsertion, out changed, out forcedSoftclip,
                                                                          out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion))
            });
            Assert.False(changed);
            Assert.False(confirmed);
            Assert.Equal("7M4S", realignedExistingIns.CigarData.ToString());

            indels = new List <HashableIndel>()
            {
                new HashableIndel()
                {
                    Chromosome        = "chr1",
                    ReferencePosition = 1506,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "ATT",
                    Type   = AlleleCategory.Insertion,
                    Length = 2,
                    Score  = 1000
                },

                new HashableIndel()
                {
                    Chromosome        = "chr1",
                    ReferencePosition = 1506,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "ATTC",
                    Type   = AlleleCategory.Insertion,
                    Length = 3,
                    Score  = 760
                },
                new HashableIndel()
                {
                    Chromosome        = "chr1",
                    ReferencePosition = 1506,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "ATTG",
                    Type   = AlleleCategory.Insertion,
                    Length = 3,
                    Score  = 10
                }
            };
            indelSource          = new ChromosomeIndelSource(indels, snippetSource.Object);
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out changed, out forcedSoftclip,
                                                               out confirmed, out sketchy);
            Assert.True(changed);
            Assert.False(confirmed);
            Assert.Equal("7M3I1M", realigned.CigarData.ToString());

            confirmedAccepteds   = new List <HashableIndel>();
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip,
                                                                 out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds);
            Assert.False(changed);
            Assert.True(confirmed);
            Assert.Equal("7M3I1M", reRealigned.CigarData.ToString());

            // Existing indel is not the top one but is the best fit, keep it
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip,
                                                                 out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion))
            });
            Assert.False(changed);
            Assert.True(confirmed);
            Assert.Equal("7M3I1M", reRealigned.CigarData.ToString());


            // Has existing unsanctioned indel and there are better ones to realign around - ignore the bad one, take the good
            realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner,
                                                            new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false);
            reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip,
                                                                 out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 1507, "A", "ATC", AlleleCategory.Insertion))
            });
            Assert.False(changed);
            Assert.True(confirmed);
            Assert.Equal("7M3I1M", reRealigned.CigarData.ToString());
        }
Beispiel #3
0
        public void GetRelevantIndels()
        {
            var indel = new HashableIndel()
            {
                AlternateAllele   = "AG",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 1,
                ReferencePosition = 10002,
                Score             = 1,
                Type = AlleleCategory.Insertion
            };
            var indel2 = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 10002,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionWayLower = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 8002,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionLikelyDiffBlockButWithinRange = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 9800,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionWayHigher = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 21000,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var borderCaseHigh = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 10251,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var borderCaseLow = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 9752,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var indelAt0 = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 0,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };


            var indels = new List <HashableIndel>()
            {
                indel,
                indel2,
                positionWayLower,
                positionLikelyDiffBlockButWithinRange,
                positionWayHigher,
                borderCaseHigh,
                borderCaseLow,
                indelAt0
            };

            var snippetSource = new Mock <IGenomeSnippetSource>();

            snippetSource.Setup(s => s.GetGenomeSnippet(It.IsAny <int>())).Returns(new GenomeSnippet()
            {
                Chromosome = "chr1", Sequence = new string('A', 2000), StartPosition = 1
            });
            var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object);

            //var relevant = indelSource.GetRelevantIndels(100);
            //Assert.Equal(4, relevant.Count);

            // Should get indel1 and 2, border high, border low, withinrange
            var relevant = indelSource.GetRelevantIndels(10002);

            Assert.Equal(5, relevant.Count());

            // Should get indel1 and 2, border low, within range, but not border high (now > 250 away)
            relevant = indelSource.GetRelevantIndels(10000);
            Assert.Equal(4, relevant.Count());

            // Should get all 5 as 10002 did, showing that it is 250 inclusive
            relevant = indelSource.GetRelevantIndels(10001);
            Assert.Equal(5, relevant.Count());

            // Should get the 9752 and the 9800
            relevant = indelSource.GetRelevantIndels(9700);
            Assert.Equal(2, relevant.Count());

            // Not close enough to anything
            relevant = indelSource.GetRelevantIndels(9500);
            Assert.Equal(0.0, relevant.Count());

            relevant = indelSource.GetRelevantIndels(0);
            Assert.Equal(1.0, relevant.Count());

            relevant = indelSource.GetRelevantIndels(100000);
            Assert.Equal(0.0, relevant.Count());
        }