public void TryReStitch_RealCases() { var read1 = TestHelpers.CreateRead("chr1", "AGCAGCAGCAGCTCCAGCACCAGCAGTCCCAGCACCAGCAGGCCCCGAAGAAGCATACCCAGCAGCAGAAGACACCTCAGCAGCTGCACCAGGTGATCGG", 14106298, new CigarAlignment("41M59S")); var read2 = TestHelpers.CreateRead("chr1", "GCGATCTATCAGTATTAGCTCCAGCATCAGCAGCCCGAGCATCTGCAGTTCTAGCAGCAGCAGTCCCAGCAGCAGCAGTCCCAGCAGCAGCTGCCCCAGT", 14106328, new CigarAlignment("52S48M")); var stitcher = new BasicStitcher(20, false, true, debug: false, nifyUnstitchablePairs: true, ignoreProbeSoftclips: true, maxReadLength: 1024, ignoreReadsAboveMaxLength: false, thresholdNumDisagreeingBases: 1000); var stitchedPairHandler = new PairHandler(new Dictionary <int, string>() { { 1, "chr1" } }, stitcher, tryStitch: true); var restitcher = new PostRealignmentStitcher(stitchedPairHandler, new Mock <IStatusHandler>().Object); var pair = new ReadPair(read1.BamAlignment); pair.AddAlignment(read2.BamAlignment); var mockNmCalculator = new Mock <INmCalculator>(); var reads = restitcher.GetRestitchedReads(pair, pair.Read1, pair.Read2, 1, 1, false, mockNmCalculator.Object, false); Assert.Equal(1.0, reads.Count); Assert.Equal("22S78M22S", reads.First().CigarData.ToString()); }
private bool CheckAndSetReadsDoNotOverlap(ReadPair readPair) { var dontOverlap = ReadsDoNotOverlap(readPair.Read1, readPair.Read2); readPair.DontOverlap = dontOverlap; return(dontOverlap); }
private PairResult HandleIndelPairIfStitchUnallowed(ReadPair readPair, int numMismatchesInR1, int numMismatchesInR2, bool r1HasIndels, bool r2HasIndels) { IEnumerable <BamAlignment> bamAlignmentList = null; PairClassification classification; var read1 = readPair.Read1; var read2 = readPair.Read2; if (read1.EndPosition >= read2.Position) { bamAlignmentList = OverlappingIndelHelpers.IndelsDisagreeWithStrongMate( read1, read2, out bool disagree, 3, false); // TODO allow to stitch if they don't disagree, as they may not necessarily get the chance later (either user is not using realigner, or there are no indels strong enough to realign against) // Alternatively, if there are no indels to realign against, still stitch stuff if we can! (handle this in the realigner) // For the cases where we want to skip realignment, either tell it to stitch here (configurable), or have it still go through realigner but not realign? if (disagree) { classification = PairClassification.Disagree; } else { classification = PairClassification.UnstitchIndel; } } else { classification = PairClassification.UnstitchIndel; } return(HandlePairContainingIndels(readPair, r1HasIndels, r2HasIndels, numMismatchesInR1, numMismatchesInR2, r1HasIndels || r2HasIndels, classification, false, bamAlignmentList)); }
private static int NumReadsFlushedAsSingles(ReadPair rp, ConcurrentDictionary <string, ReadPair> borderlinePairs, int numReadsFlushedAsSingles, BatchBlock <ReadPair> lineBuffer) { if ((ReadIsNearby(rp.Read1) || ReadIsNearby(rp.Read2)) && !(rp.DontOverlap.HasValue && !rp.DontOverlap.Value) && !rp.IsImproper) { if (!borderlinePairs.ContainsKey(rp.Name)) { borderlinePairs[rp.Name] = rp; } else { numReadsFlushedAsSingles++; // Wait til we find the mate, then post to the next block // TODO should we actually post the earlier one to both blocks and let it carry itself over? Unfortunately by the time we get to edge state we've already done pair resolution, so starting with this because it's simpler and probably rare. // TODO document as limitation foreach (var aln in rp.GetAlignments()) { borderlinePairs[rp.Name].AddAlignment(aln); } borderlinePairs.Remove(rp.Name, out var pairToPost); pairToPost.PairStatus = PairStatus.Paired; lineBuffer.Post(pairToPost); } } else { lineBuffer.Post(rp); } return(numReadsFlushedAsSingles); }
private void VerifyClassificationAndExtraction(ReadPair readpair, PairClassification expectedClassification, int expectedNumReads, bool trustSoftclips, bool deferStitchIndelReads = true, bool shouldTryStitch = true, bool stageStitchSucceed = true, bool treatAbnormalOrientationAsImproper = false, int messyMapq = 30, bool checkMd = false) { var pairHandler = new Mock <IReadPairHandler>(); pairHandler.Setup(x => x.ExtractReads(It.IsAny <ReadPair>())).Returns(stageStitchSucceed ? new List <BamAlignment>() { readpair.Read1 } : new List <BamAlignment>() { readpair.Read1, readpair.Read2 }); var extractor = new ReadPairClassifierAndExtractor(trustSoftclips, messyMapq: messyMapq, treatAbnormalOrientationAsImproper: treatAbnormalOrientationAsImproper, checkMd: checkMd); var result = extractor.GetBamAlignmentsAndClassification(readpair, pairHandler.Object); var alignments = result.Alignments; Assert.Equal(expectedClassification, result.Classification); pairHandler.Verify(x => x.ExtractReads(It.IsAny <ReadPair>()), Times.Exactly(shouldTryStitch ? 1 : 0)); Assert.Equal(expectedNumReads, alignments.ToList().Count); }
public List <BamAlignment> ExtractReads(ReadPair pair) { const char Forward = 'F'; const char Reverse = 'R'; var reads = new List <BamAlignment>(); var chrom1 = _refIdMapping[pair.Read1.RefID]; var chrom2 = _refIdMapping[pair.Read2.RefID]; var alignmentSet = new AlignmentSet( new Read(chrom1, pair.Read1), new Read(chrom2, pair.Read2), false); var read1dir = pair.Read1.IsReverseStrand() ? Reverse : Forward; var read2dir = pair.Read2.IsReverseStrand() ? Reverse : Forward; if (pair.Read1.IsSecondMate()) { read1dir = pair.Read2.IsReverseStrand() ? Reverse : Forward; read2dir = pair.Read1.IsReverseStrand() ? Reverse : Forward; } bool stitched = false; if (_tryStitch) { stitched = _stitcher.TryStitch(alignmentSet); } if (stitched) { //_statusCounter.AddStatusCount("Stitched"); if (alignmentSet.ReadsForProcessing.Count > 1) { throw new InvalidDataException("AlignmentSets for stitched reads should only have one ReadsForProcessing."); } foreach (var read in alignmentSet.ReadsForProcessing) { var alignment = StitcherHelpers.StitchifyBamAlignment(pair, read, read1dir, read2dir); reads.Add(alignment); } } else { if (!_filterUnstitchablePairs) { //_statusCounter.AddStatusCount("Unstitchable Pairs Kept"); reads.Add(new BamAlignment(alignmentSet.PartnerRead1.BamAlignment)); reads.Add(new BamAlignment(alignmentSet.PartnerRead2.BamAlignment)); } else { _statusCounter.AddStatusCount("Unstitchable Pairs Filtered"); } } return(reads); }
public void ProcessBam() { var stitcherOptions = new StitcherOptions() { }; var geminiOptions = new GeminiOptions() { RegionSize = 1000, }; var readPair1 = TestHelpers.GetPair("5M1I5M", "5M1I5M", name: "Pair1"); var readPair2 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1001, name: "Pair2"); var readPair3 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 1201, name: "Pair3"); var readPair4 = TestHelpers.GetPair("5M1I5M", "5M1I5M", read1Position: 10000, name: "Pair4"); var reads = new List <ReadPair>() { readPair1, readPair2, readPair3, readPair4 }; var read = TestHelpers.CreateBamAlignment("AAAAAAAATA", 999, 1001, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPair1"); read.SetIsProperPair(true); var lonerPair1Mate1 = new ReadPair(read, "LonerPair1"); var read2 = TestHelpers.CreateBamAlignment("AAAAATAAAA", 1002, 999, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPair1", isFirstMate: false); read2.SetIsProperPair(true); var lonerPair1Mate2 = new ReadPair(read2, "LonerPair1", readNumber: ReadNumber.Read2); var read3 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 999, 5001, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPairFarApart"); read3.SetIsProperPair(true); var read4 = TestHelpers.CreateBamAlignment("AAAAAAAAAA", 5001, 999, 30, true, cigar: new CigarAlignment("10M"), name: "LonerPairFarApart", isFirstMate: false); read4.SetIsProperPair(true); var lonerPair2Mate1 = new ReadPair(read3, name: "LonerPairFarApart"); var lonerPair2Mate2 = new ReadPair(read4, name: "LonerPairFarApart", readNumber: ReadNumber.Read2); var lonerReads = new List <ReadPair>() { lonerPair1Mate1, lonerPair1Mate2, lonerPair2Mate1, lonerPair2Mate2 }; var alignments = new List <BamAlignment>(); Execute(alignments, reads, geminiOptions, stitcherOptions, lonerReads); Assert.Equal(1, alignments.Count(x => x.Name == "Pair1")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair2")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair3")); Assert.Equal(1, alignments.Count(x => x.Name == "Pair4")); Assert.Equal(1, alignments.Count(x => x.Name == "LonerPair1")); Assert.Equal(2, alignments.Count(x => x.Name == "LonerPairFarApart")); Assert.Equal(7, alignments.Count); alignments.Clear(); }
public static BamAlignment StitchifyBamAlignment(ReadPair pair, Read read, char read1dir, char read2dir) { var alignment = new BamAlignment(read.BamAlignment); alignment.SetIsFirstMate(false); alignment.SetIsProperPair(false); var tagUtils = new TagUtils(); if (read.StitchedCigar != null) { alignment.CigarData = read.StitchedCigar; } if (read.CigarDirections != null) { tagUtils.AddStringTag("XD", read.CigarDirections.ToString()); } // if the original reads had UMIs and were collapsed, they will have XU(Z), XV(i), XW(i) // these need to be copied to correctly populate some fields in the called variants if (pair.Read1.TagData != null && pair.Read1.TagData.Length > 0) { var xu = pair.Read1.GetStringTag("XU"); if (xu != null) { tagUtils.AddStringTag("XU", xu); } var xv = pair.Read1.GetIntTag("XV"); if (xv.HasValue) { tagUtils.AddIntTag("XV", xv.Value); } var xw = pair.Read1.GetIntTag("XW"); if (xw.HasValue) { tagUtils.AddIntTag("XW", xw.Value); } } var xr = string.Format("{0}{1}", read1dir, read2dir); tagUtils.AddStringTag("XR", xr); var tagData = tagUtils.ToBytes(); var existingTags = alignment.TagData; if (existingTags == null) { alignment.TagData = tagData; } else { alignment.AppendTagData(tagData); } return(alignment); }
public void IsComplete() { // Primary alignments do not indicate presence of any supplementaries and read pair does not have supplementaries var alignment = CreateAlignment(); var readpair = new ReadPair(alignment); Assert.False(readpair.IsComplete(false)); var alignment2 = CreateAlignment(); readpair.AddAlignment(alignment2, ReadNumber.Read2); Assert.True(readpair.IsComplete(false)); Assert.True(readpair.IsComplete(true)); // Primary alignment indicates presence of supplementaries but read pair does not have them readpair = new ReadPair(alignment); var alignmentWithSupplementary = CreateAlignment(supplementary: "chr1,100,+,3M,50,1"); readpair.AddAlignment(alignmentWithSupplementary, ReadNumber.Read2); Assert.False(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); // Does not expect supplementaries, does have supplementaries readpair = new ReadPair(alignment); readpair.AddAlignment(alignment2, ReadNumber.Read2); var supplementaryAlignment = CreateAlignment(isSupplementary: true); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read2); Assert.True(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); // Does expect supplementaries, does have them readpair = new ReadPair(alignment); readpair.AddAlignment(alignmentWithSupplementary, ReadNumber.Read2); Assert.False(readpair.IsComplete(true)); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read2); Assert.True(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); // Does expect supplementaries, only has one readpair = new ReadPair(alignmentWithSupplementary); readpair.AddAlignment(alignmentWithSupplementary, ReadNumber.Read2); Assert.False(readpair.IsComplete(true)); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read2); Assert.False(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read2); Assert.False(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read1); Assert.True(readpair.IsComplete(true)); Assert.True(readpair.IsComplete(false)); }
private void CheckReadsDisagreeTest(ReadPair readPair, bool shouldDisagree, string expectedCigarR1, string expectedCigarR2, bool softclipWeakOne = false) { var result = OverlappingIndelHelpers.IndelsDisagreeWithStrongMate(readPair.Read1, readPair.Read2, out bool disagree, 1, softclipWeakOne: softclipWeakOne); Assert.Equal(shouldDisagree, disagree); Assert.Equal(expectedCigarR1, result[0].CigarData.ToString()); Assert.Equal(expectedCigarR2, result[1].CigarData.ToString()); }
public PairResult(IEnumerable <BamAlignment> alignments, ReadPair readPair, PairClassification classification = PairClassification.Unknown, bool hasIndels = false, bool isSplit = false, int numMismatchesInSingleton = 0, int softclipLengthForIndelRead = 0) { NumMismatchesInSingleton = numMismatchesInSingleton; IsSplit = isSplit; HasIndels = hasIndels; Classification = classification; Alignments = alignments; ReadPair = readPair; SoftclipLengthForIndelRead = softclipLengthForIndelRead; }
private ReadPair CreateReadPair(bool isProperPair, int read1Pos, string read1Cigar, int read2Pos, string read2Cigar) { var read1 = StitcherPairFilterTests.CreateAlignment("ABC", isProperPair, read1Pos, read1Cigar); var read2 = StitcherPairFilterTests.CreateAlignment("ABC", isProperPair, read2Pos, read2Cigar); var readpair = new ReadPair(read1, readNumber: ReadNumber.Read1); readpair.AddAlignment(read2, ReadNumber.Read2); return(readpair); }
private List <BamAlignment> ShouldRestitch(ReadPair pair) { var result = OverlappingIndelHelpers.IndelsDisagreeWithStrongMate(pair.Read1, pair.Read2, out bool disagree); if (disagree) { return(result); } return(null); }
private PairResult ClassifyIncompletePair(ReadPair readPair, bool r1HasIndels, bool r2HasIndels, int r1Nm, int r2Nm) { var hasIndels = r1HasIndels || r2HasIndels; var classification = PairClassification.Unstitchable; if (readPair.PairStatus == PairStatus.LongFragment) { classification = PairClassification.LongFragment; } var isImproper = readPair.IsImproper || (_treatAbnormalOrientationAsImproper && !readPair.NormalPairOrientation); if (isImproper) { classification = PairClassification.Improper; } if (readPair.NumPrimaryReads == 1) { classification = isImproper ? PairClassification.Improper : PairClassification.UnstitchableAsSingleton; if (hasIndels) { classification = isImproper ? PairClassification.IndelImproper : PairClassification.IndelSingleton; } } else if (hasIndels) { classification = PairClassification.UnstitchIndel; if (readPair.IsImproper //|| readPair.MaxPosition - readPair.MinPosition > 1000 ) { classification = PairClassification.IndelImproper; } } if (hasIndels) { return(HandlePairContainingIndels(readPair, r1HasIndels, r2HasIndels, r1Nm, r2Nm, hasIndels, classification, false)); } else { return(new PairResult(alignments: readPair.GetAlignments(), readPair: readPair, classification: classification, hasIndels: false, isSplit: false, numMismatchesInSingleton: Math.Max(r1Nm, r2Nm), softclipLengthForIndelRead: 0) { R1Nm = r1Nm, R2Nm = r2Nm }); } }
private PairResult HandleSplitNonIndelPair(ReadPair readPair, IEnumerable <BamAlignment> bamAlignmentList, bool hasIndels, bool isSplit) { int numMismatchesInSingleton = 0; PairClassification classification; if (bamAlignmentList == null || !bamAlignmentList.Any()) { bamAlignmentList = readPair.GetAlignments().ToList(); } if (!bamAlignmentList.Any()) { classification = PairClassification.Unusable; } else { classification = PairClassification.Split; if (readPair.PairStatus == PairStatus.SplitQuality) { classification = PairClassification.Unstitchable; } if (bamAlignmentList.Count() == 1 && bamAlignmentList.First().MapQuality < _minMapQuality) { classification = PairClassification.UnusableSplit; } else { var nms = bamAlignmentList.Select(b => b.GetIntTag("NM") ?? 0); numMismatchesInSingleton = nms.Max(); if (numMismatchesInSingleton > NumMismatchesToBeConsideredUnusableIfSplit) { // TODO perhaps make these unmapped? Or just adjust the mapq? Or eventually softclip? It's just inconvenient to do the softclipping here because we don't have the reference sequence. Perhaps we could just softclip the last N bases of the read though? // Making them unusable and skipping them ended up hurting recall, as one might imagine. Not by a ton, and you do see precision go up by a similar amount, but need to think about this. // TODO look into what TPs were lost between commit a8b and ce3. classification = PairClassification.UnusableSplit; } else if (hasIndels || numMismatchesInSingleton > _numMismatchesToBeConsideredMessy) { classification = PairClassification.MessySplit; } } } return(new PairResult(alignments: bamAlignmentList.ToList(), readPair: readPair, classification: classification, hasIndels: hasIndels, isSplit: isSplit, numMismatchesInSingleton: numMismatchesInSingleton, softclipLengthForIndelRead: 0)); }
public void GetRead2Alignments() { var alignment = CreateAlignment(); var readpair = new ReadPair(alignment, readNumber: ReadNumber.Read2); Assert.Equal(1, readpair.Read2Alignments.Count); var secondary = CreateAlignment(); secondary.SetIsSecondaryAlignment(true); readpair.AddAlignment(secondary, ReadNumber.Read2); Assert.Equal(2, readpair.Read2Alignments.Count); var secondaryR2 = CreateAlignment(); secondaryR2.IsSupplementaryAlignment(); readpair.AddAlignment(secondaryR2, ReadNumber.Read1); Assert.Equal(2, readpair.Read2Alignments.Count); }
public void GetAlignments() { var alignment = CreateAlignment(); var alignmentWithSupplementary = CreateAlignment(supplementary: "chr1,100,+,3M,50,1"); var supplementaryAlignment = CreateAlignment(isSupplementary: true); var readpair = new ReadPair(alignment); readpair.AddAlignment(alignmentWithSupplementary, ReadNumber.Read2); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read2); readpair.AddAlignment(supplementaryAlignment, ReadNumber.Read1); Assert.Equal(4, readpair.GetAlignments().Count()); readpair = new ReadPair(alignment); readpair.AddAlignment(alignmentWithSupplementary, ReadNumber.Read2); Assert.Equal(2, readpair.GetAlignments().Count()); }
private PairClassification AdjustClassificationForMultimapper(ReadPair readPair, PairClassification classification) { if (classification == PairClassification.UnstitchMessy || classification == PairClassification.UnstitchMessyIndel) { var hasIndels = classification == PairClassification.UnstitchMessyIndel; if (readPair.Read1 == null) { throw new Exception("Null read 1"); } if (readPair.Read2 == null) { throw new Exception("Null read 1"); } if (readPair.Read1.MapQuality < _messyMapq || readPair.Read2.MapQuality < _messyMapq) { classification = hasIndels ? PairClassification.UnstitchMessyIndelSuspiciousRead : PairClassification.UnstitchMessySuspiciousRead; } } return(classification); }
private void HandleUnstitchedReads(ReadPair pair, List <BamAlignment> reads, bool realignedR1, bool realignedR2, INmCalculator nmCalculator) { reads.Add(pair.Read1); reads.Add(pair.Read2); if (realignedR1) { pair.Read2.MatePosition = pair.Read1.Position; _evidenceCollector.CollectEvidence(pair.Read1, true, false, _chromosome); var nm = nmCalculator.GetNm(pair.Read1); pair.Nm1 = nm; AddNmTag(pair.Read1, nm); } if (realignedR2) { pair.Read1.MatePosition = pair.Read2.Position; _evidenceCollector.CollectEvidence(pair.Read2, true, false, _chromosome); var nm = nmCalculator.GetNm(pair.Read1); pair.Nm2 = nm; AddNmTag(pair.Read2, nm); } }
private void ExtractReadsStrandXRValidation() { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 99, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 144, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment2.AppendTagData(tagUtils.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.Equal("FR", alignment.GetStringTag("XR")); }
private void ExtractReadsStrand(bool r1Reverse, bool r2Reverse, string expectedXRTag) { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; alignment1.SetIsFirstMate(true); alignment1.SetIsReverseStrand(r1Reverse); var alignment2 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; alignment2.SetIsSecondMate(true); alignment2.SetIsReverseStrand(r2Reverse); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; var XD = alignment.GetStringTag("XR"); Assert.Equal(expectedXRTag, alignment.GetStringTag("XR")); }
public override bool TreatReadPairAsIncomplete(ReadPair readPair) { return((_treatNonOverlappingAsIncomplete && ReadsDoNotOverlap(readPair.Read1, readPair.Read2)) || (_treatImproperPairAsIncomplete && readPair.IsImproper)); }
protected override bool ShouldSkipPair(ReadPair pair) { // Given that we have a mated pair, whether we want to skip or pass them on to stitching. return(false); }
public List <PreIndel> GetPairSpecificIndels(ReadPair readpair, List <PreIndel> r1Indels, List <PreIndel> r2Indels, ref int?r1Nm, ref int?r2Nm) { return(null); }
/// <summary> /// Whether a particular pair should be skipped. Given that we have collected a pair of reads that should /// not have been skipped on their own, determines whether the characteristics of the pair together make it /// worthy of skipping. /// </summary> /// <param name="pair"></param> /// <returns></returns> protected abstract bool ShouldSkipPair(ReadPair pair);
public void AddMessEvidence() { var read1 = TestHelpers.CreateBamAlignment("ATCGATCG", 123405, 123505, 30, true); var read2 = TestHelpers.CreateBamAlignment("ATCGATCG", 123505, 123405, 30, true); var pair = new ReadPair(read1); pair.AddAlignment(read2); var pairResult = new PairResult(pair.GetAlignments(), pair); var numBins = 5000; var messyHitNonZeroes = new Dictionary <int, int>(); var indelHitNonZeroes = new Dictionary <int, int>(); var forwardMessNonZeroes = new Dictionary <int, int>(); var reverseMessNonZeroes = new Dictionary <int, int>(); var mapqMessNonZeroes = new Dictionary <int, int>(); var forwardMessNonZeroesNotUsed = new Dictionary <int, int>(); var reverseMessNonZeroesNotUsed = new Dictionary <int, int>(); var mapqMessNonZeroesNotUsed = new Dictionary <int, int>(); var singleMismatchNonZeroes = new Dictionary <int, int>(); var allHitsNonZeroes = new Dictionary <int, int>(); var binEvidence = new BinEvidence(1, true, numBins, false, 500, 123000, true, true); var binEvidenceNoMapqMess = new BinEvidence(1, true, numBins, false, 500, 123000, true, false); var binEvidenceNoDirectional = new BinEvidence(1, true, numBins, false, 500, 123000, false, true); var binEvidenceNoDirectionalOrMapqMess = new BinEvidence(1, true, numBins, false, 500, 123000, false, false); // Should add one piece of evidence for each alignment // Read1 should be in bin 0, read2 in bin 1 // First, only messy binEvidence.AddMessEvidence(true, pairResult, false, false, false, false, false); allHitsNonZeroes[0] = 1; allHitsNonZeroes[1] = 1; messyHitNonZeroes[0] = 1; messyHitNonZeroes[1] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(true, pairResult, false, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(true, pairResult, false, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(true, pairResult, false, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Add indel and mess evidence binEvidence.AddMessEvidence(true, pairResult, true, false, false, false, false); allHitsNonZeroes[0] = 2; allHitsNonZeroes[1] = 2; messyHitNonZeroes[0] = 2; messyHitNonZeroes[1] = 2; indelHitNonZeroes[0] = 1; indelHitNonZeroes[1] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(true, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(true, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(true, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Add forward mess (must also be called as mess - TODO perhaps change this) binEvidence.AddMessEvidence(true, pairResult, false, false, true, false, false); allHitsNonZeroes[0] = 3; allHitsNonZeroes[1] = 3; messyHitNonZeroes[0] = 3; messyHitNonZeroes[1] = 3; forwardMessNonZeroes[0] = 1; forwardMessNonZeroes[1] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(true, pairResult, false, false, true, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(true, pairResult, false, false, true, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(true, pairResult, false, false, true, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Add reverse mess (must also be called as mess - TODO perhaps change this) binEvidence.AddMessEvidence(true, pairResult, false, false, false, true, false); allHitsNonZeroes[0] = 4; allHitsNonZeroes[1] = 4; messyHitNonZeroes[0] = 4; messyHitNonZeroes[1] = 4; reverseMessNonZeroes[0] = 1; reverseMessNonZeroes[1] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(true, pairResult, false, false, false, true, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(true, pairResult, false, false, false, true, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(true, pairResult, false, false, false, true, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Add mapq mess (must also be called as mess - TODO perhaps change this) binEvidence.AddMessEvidence(true, pairResult, false, false, false, false, true); allHitsNonZeroes[0] = 5; allHitsNonZeroes[1] = 5; messyHitNonZeroes[0] = 5; messyHitNonZeroes[1] = 5; mapqMessNonZeroes[0] = 1; mapqMessNonZeroes[1] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(true, pairResult, false, false, false, false, true); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(true, pairResult, false, false, false, false, true); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(true, pairResult, false, false, false, false, true); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Add indel only binEvidence.AddMessEvidence(false, pairResult, true, false, false, false, false); allHitsNonZeroes[0] = 6; allHitsNonZeroes[1] = 6; indelHitNonZeroes[0] = 2; indelHitNonZeroes[1] = 2; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(false, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(false, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(false, pairResult, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); var read1_pair2 = TestHelpers.CreateBamAlignment("ATCGATCG", 125005, 126005, 30, true); var read2_pair2 = TestHelpers.CreateBamAlignment("ATCGATCG", 126005, 125005, 30, true); var pair2 = new ReadPair(read1_pair2); pair2.AddAlignment(read2_pair2); var pairResult2 = new PairResult(pair2.GetAlignments(), pair2); // Add at different region binEvidence.AddMessEvidence(false, pairResult2, true, false, false, false, false); allHitsNonZeroes[4] = 1; allHitsNonZeroes[6] = 1; indelHitNonZeroes[4] = 1; indelHitNonZeroes[6] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(false, pairResult2, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(false, pairResult2, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(false, pairResult2, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Test on diff chroms - mate on diff chrom shouldn't contribute var read1_pair3 = TestHelpers.CreateBamAlignment("ATCGATCG", 125005, 126005, 30, true); var read2_pair3 = TestHelpers.CreateBamAlignment("ATCGATCG", 126005, 125005, 30, true); read2_pair3.RefID = 2; read1_pair3.MateRefID = 2; var pairSplitChrom = new ReadPair(read1_pair3); pairSplitChrom.AddAlignment(read2_pair3); var pairResultSplitChrom = new PairResult(pairSplitChrom.GetAlignments(), pairSplitChrom); binEvidence.AddMessEvidence(false, pairResultSplitChrom, true, false, false, false, false); allHitsNonZeroes[4] = 2; indelHitNonZeroes[4] = 2; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(false, pairResultSplitChrom, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(false, pairResultSplitChrom, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(false, pairResultSplitChrom, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Read spans 2 bins (long read, or straddles two). Should increment in both. var read1_pair4 = TestHelpers.CreateBamAlignment("ATCGATCG", 125005, 126495, 30, true); var read2_pair4 = TestHelpers.CreateBamAlignment("ATCGATCG", 126495, 125005, 30, true); var pairRead2Spans2Bins = new ReadPair(read1_pair4); pairRead2Spans2Bins.AddAlignment(read2_pair4); var pairResultRead2Spans2Bins = new PairResult(pairRead2Spans2Bins.GetAlignments(), pairRead2Spans2Bins); binEvidence.AddMessEvidence(false, pairResultRead2Spans2Bins, true, false, false, false, false); allHitsNonZeroes[4] = 3; allHitsNonZeroes[6] = 2; allHitsNonZeroes[7] = 1; indelHitNonZeroes[4] = 3; indelHitNonZeroes[6] = 2; indelHitNonZeroes[7] = 1; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(false, pairResultRead2Spans2Bins, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(false, pairResultRead2Spans2Bins, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(false, pairResultRead2Spans2Bins, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Test mate goes outside of binEvidence region var read1_pair5 = TestHelpers.CreateBamAlignment("ATCGATCG", 125005, 100026495, 30, true); var read2_pair5 = TestHelpers.CreateBamAlignment("ATCGATCG", 100026495, 125005, 30, true); var pairRead2PastRegion = new ReadPair(read1_pair5); pairRead2PastRegion.AddAlignment(read2_pair5); var pairResultRead2PastRegion = new PairResult(pairRead2PastRegion.GetAlignments(), pairRead2PastRegion); binEvidence.AddMessEvidence(false, pairResultRead2PastRegion, true, false, false, false, false); allHitsNonZeroes[4] = 4; indelHitNonZeroes[4] = 4; CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidence, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using mapq mess binEvidenceNoMapqMess.AddMessEvidence(false, pairResultRead2PastRegion, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoMapqMess, indelHitNonZeroes, forwardMessNonZeroes, reverseMessNonZeroes, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional binEvidenceNoDirectional.AddMessEvidence(false, pairResultRead2PastRegion, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectional, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroes, singleMismatchNonZeroes, allHitsNonZeroes); // Not using directional or mapq mess binEvidenceNoDirectionalOrMapqMess.AddMessEvidence(false, pairResultRead2PastRegion, true, false, false, false, false); CheckCorrectBinsIncremented(numBins, messyHitNonZeroes, binEvidenceNoDirectionalOrMapqMess, indelHitNonZeroes, forwardMessNonZeroesNotUsed, reverseMessNonZeroesNotUsed, mapqMessNonZeroesNotUsed, singleMismatchNonZeroes, allHitsNonZeroes); }
public void ExecuteTest() { var alignmentsToRead = new List <BamAlignment>() { CreateAlignment("pair"), CreateAlignment("pair2"), CreateAlignment("single") }; var writtenAlignments = new List <BamAlignment>(); var unpairedAlignments = new List <BamAlignment>(); var bamWriter = new Mock <IBamWriterMultithreaded>(); bamWriter.Setup(x => x.WriteAlignment(It.IsAny <BamAlignment>())).Callback <BamAlignment>((b) => writtenAlignments.Add(b)); var bamWriterHandle = new Mock <IBamWriterHandle>(); var bamWriterHandleList = new List <IBamWriterHandle>(); bamWriterHandleList.Add(bamWriterHandle.Object); bamWriterHandle.Setup(x => x.WriteAlignment(It.IsAny <BamAlignment>())).Callback <BamAlignment>((b) => writtenAlignments.Add(b)); bamWriter.Setup(x => x.GenerateHandles()).Returns(() => { return(bamWriterHandleList); }); bamWriter.Setup(x => x.Flush()); var alignmentPairFilter = new Mock <IAlignmentPairFilter>(); alignmentPairFilter.Setup(f => f.TryPair(It.IsAny <BamAlignment>())) .Returns <BamAlignment>((b) => { if (b.Name.StartsWith("pair")) { var x = new ReadPair(b); x.AddAlignment(b); return(x); } unpairedAlignments.Add(b); return(null); } ); alignmentPairFilter.Setup(f => f.GetFlushableUnpairedReads()).Returns(unpairedAlignments); BlockingCollection <Task> taskQueue = new BlockingCollection <Task>(); // Create a thread pool with 1 thread. It will execute any tasks // added to the taskQueue by the BamRewriter. ThreadPool threadPool = new ThreadPool(taskQueue, 1); var readPairHandler = new Mock <IReadPairHandler>(); readPairHandler.Setup(h => h.ExtractReads(It.IsAny <ReadPair>())).Returns <ReadPair>( (p) => { var list = new List <BamAlignment>(); list.AddRange(p.Read1Alignments); list.AddRange(p.Read2Alignments); return(list); } ); // Given a list of reads, should try to pair them all // Should flush to bam once the buffer reaches the specified size // If specifying getUnpaired = true, should also flush unpaired reads (as designated by the filter) to the bam var bamRewriter = new BamRewriter(MockBamReader(alignmentsToRead), bamWriter.Object, alignmentPairFilter.Object, new List <IReadPairHandler> { readPairHandler.Object }, taskQueue, true); bamRewriter.Execute(); Assert.Equal(5, writtenAlignments.Count); // Should get all of the reads flushed, regardless of buffer size writtenAlignments.Clear(); unpairedAlignments.Clear(); bamRewriter = new BamRewriter(MockBamReader(alignmentsToRead), bamWriter.Object, alignmentPairFilter.Object, new List <IReadPairHandler> { readPairHandler.Object }, taskQueue, true); bamRewriter.Execute(); Assert.Equal(5, writtenAlignments.Count); writtenAlignments.Clear(); unpairedAlignments.Clear(); bamRewriter = new BamRewriter(MockBamReader(alignmentsToRead), bamWriter.Object, alignmentPairFilter.Object, new List <IReadPairHandler> { readPairHandler.Object }, taskQueue, true); bamRewriter.Execute(); Assert.Equal(5, writtenAlignments.Count); // If getUnpaired = false, should not flush unpaired reads to the bam writtenAlignments.Clear(); unpairedAlignments.Clear(); bamRewriter = new BamRewriter(MockBamReader(alignmentsToRead), bamWriter.Object, alignmentPairFilter.Object, new List <IReadPairHandler> { readPairHandler.Object }, taskQueue, false); bamRewriter.Execute(); Assert.Equal(4, writtenAlignments.Count); Assert.True(writtenAlignments.All(a => a.Name.StartsWith("pair"))); }
public static ReadPair GetPair(string cigar1, string cigar2, uint mapq1 = 30, uint mapq2 = 30, PairStatus pairStatus = PairStatus.Paired, bool singleReadOnly = false, int nm = 0, int read2Offset = 0, int?nm2 = null, string name = null, string basesRaw = "AAAGTTTTCCCCCCCCCCCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", int read1Position = 99, string read1Bases = null, string read2Bases = null) { int nmRead2 = nm2 ?? nm; var tagUtils = new TagUtils(); if (nm >= 0) { tagUtils.AddIntTag("NM", nm); } var cigarAln1 = new CigarAlignment(cigar1); var qualities1 = new List <byte>(); for (int i = 0; i < cigarAln1.GetReadSpan(); i++) { qualities1.Add(30); } //var basesRaw = "AAAGTTTTCCCCCCCCCCCC"; var alignment = new BamAlignment { Name = name ?? "hi:1:2:3:4:5:6", RefID = 1, Position = read1Position, Bases = read1Bases ?? basesRaw.Substring(0, (int)cigarAln1.GetReadSpan()), CigarData = cigarAln1, Qualities = qualities1.ToArray(), MapQuality = mapq1 }; alignment.TagData = tagUtils.ToBytes(); if (!singleReadOnly) { alignment.SetIsProperPair(true); alignment.MateRefID = 1; } var pair = new ReadPair(alignment); if (!singleReadOnly) { alignment.SetIsMateUnmapped(false); var tagUtils2 = new TagUtils(); if (nmRead2 >= 0) { tagUtils2.AddIntTag("NM", nmRead2); } var cigarAln2 = new CigarAlignment(cigar2); var qualities2 = new List <byte>(); for (int i = 0; i < cigarAln2.GetReadSpan(); i++) { qualities2.Add(30); } var alignment2 = new BamAlignment { Name = "hi:1:2:3:4:5:6", RefID = 1, Position = read1Position + read2Offset, Bases = read2Bases ?? basesRaw.Substring(read2Offset, (int)cigarAln2.GetReadSpan()), CigarData = cigarAln2, Qualities = qualities2.ToArray(), MapQuality = mapq2 }; alignment2.MateRefID = pair.Read1.RefID; alignment2.SetIsProperPair(true); alignment2.SetIsSecondMate(true); alignment2.SetIsReverseStrand(true); alignment2.TagData = tagUtils2.ToBytes(); pair.AddAlignment(alignment2); } pair.PairStatus = pairStatus; return(pair); }
private void ExtractReads(bool addUmiTags) { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("BC", "14"); tagUtils.AddIntTag("SM", 40); if (addUmiTags) { tagUtils.AddIntTag("XV", 1); tagUtils.AddIntTag("XW", 2); tagUtils.AddStringTag("XU", "ABBA-ZABBA"); } alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils2.AddIntTag("NM", 5); tagUtils2.AddStringTag("BC", "14"); tagUtils2.AddIntTag("SM", 20); if (addUmiTags) { tagUtils2.AddIntTag("XV", 1); tagUtils2.AddIntTag("XW", 2); tagUtils2.AddStringTag("XU", "ABBA-ZABBA"); } alignment2.AppendTagData(tagUtils2.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.NotNull(alignment.GetStringTag("XD")); Assert.Null(alignment.GetIntTag("NM")); Assert.Null(alignment.GetStringTag("BC")); Assert.Null(alignment.GetIntTag("SM")); if (addUmiTags) { Assert.Equal("ABBA-ZABBA", alignment.GetStringTag("XU")); Assert.Equal(1, alignment.GetIntTag("XV")); Assert.Equal(2, alignment.GetIntTag("XW")); } }
private IEnumerable <BamAlignment> TryStitch(ReadPair readPair, IReadPairHandler pairHandler, out PairClassification classification) { // TODO if we end up allowing NM calculation in here, this will become true. const bool allowStitchingOnImperfectReads = false; IEnumerable <BamAlignment> bamAlignmentList = pairHandler.ExtractReads(readPair); var bamAlignmentList2 = bamAlignmentList.ToList(); if (bamAlignmentList2.Count == 1) { readPair.Stitched = true; classification = PairClassification.PerfectStitched; if (allowStitchingOnImperfectReads) { var stitchedResult = bamAlignmentList2[0]; int?nm = 0; //TODO handle this if it is a hit on performance. Making it simple for now because the previous logic where we were lazy evaluating was a bit skewed var containsImperfections = ReadContainsImperfections(stitchedResult, _trustSoftclips); //nm = stitchedResult.GetIntTag("NM"); // TODO reinstate this if stitched read has proper NM var numMismatchesInR1 = readPair.Read1.GetIntTag("NM"); var numMismatchesInR2 = readPair.Read2.GetIntTag("NM"); if (containsImperfections || (nm > 0 || numMismatchesInR1 > 0 || numMismatchesInR2 > 0)) { classification = PairClassification.ImperfectStitched; if (numMismatchesInR1 <= NumMismatchesToBeConsideredLikelySnvInStitched && numMismatchesInR2 <= NumMismatchesToBeConsideredLikelySnvInStitched && !containsImperfections) { classification = PairClassification.SingleMismatchStitched; } else if (nm >= _numMismatchesToBeConsideredMessy || numMismatchesInR1 >= _numMismatchesToBeConsideredMessy || numMismatchesInR2 >= _numMismatchesToBeConsideredMessy) { classification = PairClassification.MessyStitched; } } } foreach (var alignment in bamAlignmentList) { foreach (var tag in _tagsToKeepFromR1) { var r1Tag = readPair.Read1.GetStringTag(tag); if (r1Tag != null) { alignment.ReplaceOrAddStringTag(tag, r1Tag); } } } } else { classification = PairClassification.FailStitch; } return(bamAlignmentList); }