public void GetGenomeSnippet() { var genome = new Mock <IGenome>(); genome.Setup(x => x.GetChrReference("chr1")) .Returns(new ChrReference() { Sequence = new string('A', 10) + new string('B', 50), Name = "chr1" }); var baseSnippetSource = new GenomeSnippetSource("chr1", genome.Object, 20, 0); var snippetSource = new ReusableSnippetSource(baseSnippetSource, 10); var snippet = snippetSource.GetGenomeSnippet(5); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('A', 10) + new string('B', 30), snippet.Sequence); Assert.Equal(0, snippet.StartPosition); // Confirm proper indexing - demonstrates why we need to have 0 as min snippet startposition var pair = TestHelpers.GetPair("10M", "10M"); pair.Read1.Bases = "AAAAAAAABB"; pair.Read1.Position = 5; var originalAlignmentSummary = Extensions.GetAlignmentSummary((new Read("chr1", pair.Read1)), snippet.Sequence, true, true, snippet.StartPosition); Assert.Equal(3, originalAlignmentSummary.NumMismatches); // Hitting up against the end of the chromosome snippet = snippetSource.GetGenomeSnippet(59); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(21, snippet.Sequence.Length); Assert.Equal(new string('B', 21), snippet.Sequence); Assert.Equal(39, snippet.StartPosition); // Very beginning of the chromosome snippet = snippetSource.GetGenomeSnippet(0); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('A', 10) + new string('B', 30), snippet.Sequence); Assert.Equal(0, snippet.StartPosition); // Somewhere in the middle snippet = snippetSource.GetGenomeSnippet(30); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(10, snippet.StartPosition); // Use the same snippet if we're close enough snippet = snippetSource.GetGenomeSnippet(31); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(10, snippet.StartPosition); // Boundary: just within range to use the same snippet snippet = snippetSource.GetGenomeSnippet(39); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(10, snippet.StartPosition); // Need to move to new snippet snippet = snippetSource.GetGenomeSnippet(40); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(20, snippet.StartPosition); // Use the same snippet if we're close enough snippet = snippetSource.GetGenomeSnippet(39); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(20, snippet.StartPosition); // Use the same snippet if we're close enough snippet = snippetSource.GetGenomeSnippet(35); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(20, snippet.StartPosition); snippet = snippetSource.GetGenomeSnippet(31); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(20, snippet.StartPosition); snippet = snippetSource.GetGenomeSnippet(30); Assert.Equal("chr1", snippet.Chromosome); Assert.Equal(40, snippet.Sequence.Length); Assert.Equal(new string('B', 40), snippet.Sequence); Assert.Equal(10, snippet.StartPosition); // Off the end of the chromosome Assert.Throws <ArgumentException>(() => snippetSource.GetGenomeSnippet(81)); // Shouldn't have negative position Assert.Throws <ArgumentException>(() => snippetSource.GetGenomeSnippet(-1)); }
private List <BamAlignment> ProcessCategory( List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource, bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites, ref int numKept, ref int numRealigned, ref int numSilenced, List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence, ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition) { var allAlignments = new List <BamAlignment>(); var isHighLikelihoodForRealign = false; if (_geminiOptions.ForceHighLikelihoodRealigners) { var highLikelihoodCategories = new List <PairClassification>() { PairClassification.Disagree, PairClassification.MessyStitched, PairClassification.MessySplit, PairClassification.UnstitchMessy, PairClassification.UnstitchIndel }; isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification); } int alignmentsCount = 0; var doRealign = false; ReadPairRealignerAndCombiner realignHandler = null; var alreadyStitched = ClassificationIsStitched(classification); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = categoriesForRealignment.Contains(classification); if (categoryIsRealignable || doStitch) { doRealign = true; realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch, alreadyStitched, _realignmentOptions.PairAwareEverything || ClassificationIsPairAwareRealignable(classification), _refIdMapping, new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(), ClassificationHasIndels(classification), outcomesLookup , SkipRestitchIfUnchanged(classification)); } using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference)) using (var singleSnippetSource = new ReusableSnippetSource(snippetSource)) { var nmCalculator = new NmCalculator(singleSnippetSource); var classificationString = classification.ToString(); foreach (var pairResult in pairResults) { int toSilence = 0; IEnumerable <BamAlignment> alignments; if (!doRealign) { alignments = pairResult.Alignments; } else { bool doRealignPair = shouldRealignAtAll && (isHighLikelihoodForRealign || (categoryIsRealignable && (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) || usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition)))); if (!doRealignPair) { numSkippedDueToSites++; } else { numKept++; } toSilence = ReadsToSilence(classification, binConclusions, pairResult); if (toSilence > 0) { numSilenced++; } alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence); if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 || pairResult.ReadPair.RealignedR2) { numRealigned++; } } var silencedR1 = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1; var silencedR2 = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2; var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult); progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1, (x, currentCount) => { return(currentCount + 1); }); var alignmentsList = alignments.ToList(); foreach (var bamAlignment in alignmentsList) { if (_geminiOptions.LightDebug) { AddMdTagCountsTags(bamAlignment, pairResult); } bamAlignment.ReplaceOrAddStringTag("XT", readTreatment); bamAlignment.ReplaceOrAddStringTag("XP", classificationString); } alignmentsCount += alignmentsList.Count(); allAlignments.AddRange(alignmentsList); } } if (realignHandler != null) { realignHandler.Finish(); } pairResults.Clear(); return(allAlignments); }