private static List <BamAlignment> ExtractReadsFromRealignerAndCombiner(PairResult pair, string refSeq, int refSeqOffset, List <PreIndel> preIndels, bool hasExistingIndels = false) { var stitchedPairHandler = new PairHandler(new Dictionary <int, string>() { { 1, "chr1" } }, new BasicStitcher(0), tryStitch: true); var snippetSource = new Mock <IGenomeSnippetSource>(); var genomeSnippet = new GenomeSnippet() { Chromosome = "chr1", Sequence = new string('A', refSeqOffset) + refSeq + new string('T', 1000), StartPosition = 0 }; snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet); var mockStatusHandler = new Mock <IStatusHandler>(); var comparer = new GemBasicAlignmentComparer(false, false); var readRealigner = new GeminiReadRealigner(comparer, remaskSoftclips: false, keepProbeSoftclips: false, keepBothSideSoftclips: false, trackActualMismatches: false, checkSoftclipsForMismatches: true, debug: false, maskNsOnly: false, maskPartialInsertion: false, minimumUnanchoredInsertionLength: 1, minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here... var filterer = new Mock <IRegionFilterer>(); filterer.Setup(x => x.AnyIndelsNearby(It.IsAny <int>())).Returns(true); var indels = preIndels.Select(x => HashableIndelSource.GetHashableIndel(genomeSnippet, x, 0, false)).ToList(); var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object); var realignmentEvaluator = new RealignmentEvaluator(indelSource, mockStatusHandler.Object, readRealigner, new RealignmentJudger(comparer), "chr1", false, true, true, true, filterer.Object, false); var combiner = new ReadPairRealignerAndCombiner(new NonSnowballEvidenceCollector(), new PostRealignmentStitcher(stitchedPairHandler, new DebugStatusHandler(new ReadStatusCounter())), realignmentEvaluator, new PairSpecificIndelFinder(), "chr1", false, hasExistingIndels: hasExistingIndels); var nmCalc = new NmCalculator(snippetSource.Object); var result = combiner.ExtractReads(pair, nmCalc); return(result); }
public void GetNm() { var snippetSource = new Mock <IGenomeSnippetSource>(); var genomeSnippet = new GenomeSnippet() { Chromosome = "chr1", Sequence = "NNNNNAAAAATTTTTGGGGGCCCCC", StartPosition = 94 // 0 based }; snippetSource.Setup(x => x.GetGenomeSnippet(It.IsAny <int>())).Returns(genomeSnippet); var nmCalculator = new NmCalculator(snippetSource.Object); // Positions passed to CreateBamAlignment are one based bc it adjusts by one in the helper var alignment = TestHelpers.CreateBamAlignment("AAAAA", 100, 0, 30, true); Assert.Equal(0, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("AATAA", 100, 0, 30, true); Assert.Equal(1, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("AGTGT", 100, 0, 30, true); Assert.Equal(4, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("AGTGT", 100, 0, 30, true, cigar: new CigarAlignment("1M4I")); Assert.Equal(4, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("ATTTT", 100, 0, 30, true, cigar: new CigarAlignment("1M4D4M")); Assert.Equal(4, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("ACCCC", 100, 0, 30, true, cigar: new CigarAlignment("1M4D4M")); Assert.Equal(8, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("GAAAA", 100, 0, 30, true); Assert.Equal(1, nmCalculator.GetNm(alignment)); alignment = TestHelpers.CreateBamAlignment("AATAA", 100, 0, 30, true, cigar: new CigarAlignment("2M3S")); Assert.Equal(0, nmCalculator.GetNm(alignment)); }
private List <BamAlignment> ProcessCategory( List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource, bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites, ref int numKept, ref int numRealigned, ref int numSilenced, List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence, ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition) { var allAlignments = new List <BamAlignment>(); var isHighLikelihoodForRealign = false; if (_geminiOptions.ForceHighLikelihoodRealigners) { var highLikelihoodCategories = new List <PairClassification>() { PairClassification.Disagree, PairClassification.MessyStitched, PairClassification.MessySplit, PairClassification.UnstitchMessy, PairClassification.UnstitchIndel }; isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification); } int alignmentsCount = 0; var doRealign = false; ReadPairRealignerAndCombiner realignHandler = null; var alreadyStitched = ClassificationIsStitched(classification); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = categoriesForRealignment.Contains(classification); if (categoryIsRealignable || doStitch) { doRealign = true; realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch, alreadyStitched, _realignmentOptions.PairAwareEverything || ClassificationIsPairAwareRealignable(classification), _refIdMapping, new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(), ClassificationHasIndels(classification), outcomesLookup , SkipRestitchIfUnchanged(classification)); } using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference)) using (var singleSnippetSource = new ReusableSnippetSource(snippetSource)) { var nmCalculator = new NmCalculator(singleSnippetSource); var classificationString = classification.ToString(); foreach (var pairResult in pairResults) { int toSilence = 0; IEnumerable <BamAlignment> alignments; if (!doRealign) { alignments = pairResult.Alignments; } else { bool doRealignPair = shouldRealignAtAll && (isHighLikelihoodForRealign || (categoryIsRealignable && (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) || usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition)))); if (!doRealignPair) { numSkippedDueToSites++; } else { numKept++; } toSilence = ReadsToSilence(classification, binConclusions, pairResult); if (toSilence > 0) { numSilenced++; } alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence); if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 || pairResult.ReadPair.RealignedR2) { numRealigned++; } } var silencedR1 = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1; var silencedR2 = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2; var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult); progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1, (x, currentCount) => { return(currentCount + 1); }); var alignmentsList = alignments.ToList(); foreach (var bamAlignment in alignmentsList) { if (_geminiOptions.LightDebug) { AddMdTagCountsTags(bamAlignment, pairResult); } bamAlignment.ReplaceOrAddStringTag("XT", readTreatment); bamAlignment.ReplaceOrAddStringTag("XP", classificationString); } alignmentsCount += alignmentsList.Count(); allAlignments.AddRange(alignmentsList); } } if (realignHandler != null) { realignHandler.Finish(); } pairResults.Clear(); return(allAlignments); }