private static List <BamAlignment> ExtractReadsFromRealignerAndCombiner(PairResult pair, string refSeq, int refSeqOffset, List <PreIndel> preIndels, bool hasExistingIndels = false) { var stitchedPairHandler = new PairHandler(new Dictionary <int, string>() { { 1, "chr1" } }, new BasicStitcher(0), tryStitch: true); var snippetSource = new Mock <IGenomeSnippetSource>(); var genomeSnippet = new GenomeSnippet() { Chromosome = "chr1", Sequence = new string('A', refSeqOffset) + refSeq + new string('T', 1000), StartPosition = 0 }; snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet); var mockStatusHandler = new Mock <IStatusHandler>(); var comparer = new GemBasicAlignmentComparer(false, false); var readRealigner = new GeminiReadRealigner(comparer, remaskSoftclips: false, keepProbeSoftclips: false, keepBothSideSoftclips: false, trackActualMismatches: false, checkSoftclipsForMismatches: true, debug: false, maskNsOnly: false, maskPartialInsertion: false, minimumUnanchoredInsertionLength: 1, minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here... var filterer = new Mock <IRegionFilterer>(); filterer.Setup(x => x.AnyIndelsNearby(It.IsAny <int>())).Returns(true); var indels = preIndels.Select(x => HashableIndelSource.GetHashableIndel(genomeSnippet, x, 0, false)).ToList(); var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); var realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); var combiner = new ReadPairRealignerAndCombiner(new NonSnowballEvidenceCollector(), new PostRealignmentStitcher(stitchedPairHandler, new DebugStatusHandler(new ReadStatusCounter())), realignmentEvaluator, new PairSpecificIndelFinder(), "chr1", false, hasExistingIndels: hasExistingIndels); var nmCalc = new NmCalculator(snippetSource.Object); var result = combiner.ExtractReads(pair, nmCalc); return(result); }
public void GetFinalAlignment_NonMock() { var snippetSource = new Mock <IGenomeSnippetSource>(); var genomeSnippet = new GenomeSnippet() { Chromosome = "chr1", Sequence = new string('A', 1000) + "ATCGATTGA" + new string('T', 1000), StartPosition = 1000 }; snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet); var mockStatusHandler = new Mock <IStatusHandler>(); var comparer = new GemBasicAlignmentComparer(false, false); var readRealigner = new GeminiReadRealigner(comparer, remaskSoftclips: false, keepProbeSoftclips: false, keepBothSideSoftclips: false, trackActualMismatches: false, checkSoftclipsForMismatches: true, debug: false, maskNsOnly: false, maskPartialInsertion: false, minimumUnanchoredInsertionLength: 1, minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here... var filterer = GetMockRegionFilterer(); var indels = new List <HashableIndel>(); var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); var realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); var origBamAlignment = TestHelpers.CreateBamAlignment("AAAAAAATTCA", 1500, 1500, 30, true, cigar: new CigarAlignment("11M")); var realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out bool changed, out bool forcedSoftclip, out bool confirmed, out bool sketchy); // No indels Assert.False(changed); Assert.False(confirmed); indels = new List <HashableIndel>() { new HashableIndel() { Chromosome = "chr1", ReferencePosition = 1506, ReferenceAllele = "A", AlternateAllele = "ATT", Type = AlleleCategory.Insertion, Length = 2 } }; indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out changed, out forcedSoftclip, out confirmed, out sketchy); Assert.True(changed); Assert.False(confirmed); Assert.Equal("7M2I2M", realigned.CigarData.ToString()); var confirmedAccepteds = new List <HashableIndel>(); realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); var reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds); Assert.False(changed); Assert.True(confirmed); Assert.Equal("7M2I2M", reRealigned.CigarData.ToString()); // Existing indel is best (and only) realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>() { new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATT", AlleleCategory.Insertion)) }); Assert.False(changed); Assert.True(confirmed); Assert.Equal("7M2I2M", reRealigned.CigarData.ToString()); // Existing indel is unsanctioned but good fit - keep it var alignmentWithInsertion = TestHelpers.CreateBamAlignment("AAAAAAATTCA", 1500, 1500, 30, true, cigar: new CigarAlignment("7M3I1M")); realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, false, filterer.Object, false); var realignedExistingIns = realignmentEvaluator.GetFinalAlignment(alignmentWithInsertion, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>() { new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion)) }); Assert.False(changed); Assert.False(confirmed); Assert.Equal("7M3I1M", realignedExistingIns.CigarData.ToString()); // Existing indel is unsanctioned and we're softclipping unknowns - softclip it realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); realignedExistingIns = realignmentEvaluator.GetFinalAlignment(alignmentWithInsertion, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>() { new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion)) }); Assert.False(changed); Assert.False(confirmed); Assert.Equal("7M4S", realignedExistingIns.CigarData.ToString()); indels = new List <HashableIndel>() { new HashableIndel() { Chromosome = "chr1", ReferencePosition = 1506, ReferenceAllele = "A", AlternateAllele = "ATT", Type = AlleleCategory.Insertion, Length = 2, Score = 1000 }, new HashableIndel() { Chromosome = "chr1", ReferencePosition = 1506, ReferenceAllele = "A", AlternateAllele = "ATTC", Type = AlleleCategory.Insertion, Length = 3, Score = 760 }, new HashableIndel() { Chromosome = "chr1", ReferencePosition = 1506, ReferenceAllele = "A", AlternateAllele = "ATTG", Type = AlleleCategory.Insertion, Length = 3, Score = 10 } }; indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); realigned = realignmentEvaluator.GetFinalAlignment(origBamAlignment, out changed, out forcedSoftclip, out confirmed, out sketchy); Assert.True(changed); Assert.False(confirmed); Assert.Equal("7M3I1M", realigned.CigarData.ToString()); confirmedAccepteds = new List <HashableIndel>(); realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds); Assert.False(changed); Assert.True(confirmed); Assert.Equal("7M3I1M", reRealigned.CigarData.ToString()); // Existing indel is not the top one but is the best fit, keep it realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>() { new PreIndel(new CandidateAllele("chr1", 1506, "A", "ATTC", AlleleCategory.Insertion)) }); Assert.False(changed); Assert.True(confirmed); Assert.Equal("7M3I1M", reRealigned.CigarData.ToString()); // Has existing unsanctioned indel and there are better ones to realign around - ignore the bad one, take the good realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); reRealigned = realignmentEvaluator.GetFinalAlignment(realigned, out changed, out forcedSoftclip, out confirmed, out sketchy, confirmedAccepteds: confirmedAccepteds, existingIndels: new List <PreIndel>() { new PreIndel(new CandidateAllele("chr1", 1507, "A", "ATC", AlleleCategory.Insertion)) }); Assert.False(changed); Assert.True(confirmed); Assert.Equal("7M3I1M", reRealigned.CigarData.ToString()); }
public void GetRelevantIndels() { var indel = new HashableIndel() { AlternateAllele = "AG", ReferenceAllele = "A", Chromosome = "chr1", Length = 1, ReferencePosition = 10002, Score = 1, Type = AlleleCategory.Insertion }; var indel2 = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 10002, Score = 10, Type = AlleleCategory.Insertion }; var positionWayLower = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 8002, Score = 10, Type = AlleleCategory.Insertion }; var positionLikelyDiffBlockButWithinRange = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 9800, Score = 10, Type = AlleleCategory.Insertion }; var positionWayHigher = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 21000, Score = 10, Type = AlleleCategory.Insertion }; var borderCaseHigh = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 10251, Score = 10, Type = AlleleCategory.Insertion }; var borderCaseLow = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 9752, Score = 10, Type = AlleleCategory.Insertion }; var indelAt0 = new HashableIndel() { AlternateAllele = "AGT", ReferenceAllele = "A", Chromosome = "chr1", Length = 2, ReferencePosition = 0, Score = 10, Type = AlleleCategory.Insertion }; var indels = new List <HashableIndel>() { indel, indel2, positionWayLower, positionLikelyDiffBlockButWithinRange, positionWayHigher, borderCaseHigh, borderCaseLow, indelAt0 }; var snippetSource = new Mock <IGenomeSnippetSource>(); snippetSource.Setup(s => s.GetGenomeSnippet(It.IsAny <int>())).Returns(new GenomeSnippet() { Chromosome = "chr1", Sequence = new string('A', 2000), StartPosition = 1 }); var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); //var relevant = indelSource.GetRelevantIndels(100); //Assert.Equal(4, relevant.Count); // Should get indel1 and 2, border high, border low, withinrange var relevant = indelSource.GetRelevantIndels(10002); Assert.Equal(5, relevant.Count()); // Should get indel1 and 2, border low, within range, but not border high (now > 250 away) relevant = indelSource.GetRelevantIndels(10000); Assert.Equal(4, relevant.Count()); // Should get all 5 as 10002 did, showing that it is 250 inclusive relevant = indelSource.GetRelevantIndels(10001); Assert.Equal(5, relevant.Count()); // Should get the 9752 and the 9800 relevant = indelSource.GetRelevantIndels(9700); Assert.Equal(2, relevant.Count()); // Not close enough to anything relevant = indelSource.GetRelevantIndels(9500); Assert.Equal(0.0, relevant.Count()); relevant = indelSource.GetRelevantIndels(0); Assert.Equal(1.0, relevant.Count()); relevant = indelSource.GetRelevantIndels(100000); Assert.Equal(0.0, relevant.Count()); }