public void KeepOriginalIfAlignmentUnchanged() { // If realignment results in same alignment, don't count it as realigned // If realignment results in different cigar and same position, count it as realigned // If realignment results in same cigar and different position, count it as realigned -- having trouble recreating this situation... var hasIndels = new Read("chr", CreateAlignment("hasIndels", 1, "2M2I3M", "ZATTAAZ")); var hasIndels2 = new Read("chr", CreateAlignment("hasIndels2", 1, "2M2I3M", "ZATTAAZ")); var realignSamePosNewCigar = new Read("chr", CreateAlignment("candidateForRealignmentGoesToSamePos", 1, "7M", "ZATTAAZ")); var extractorForRealign = new MockExtractor(new List <Read> { hasIndels, hasIndels2, realignSamePosNewCigar, }); var extractorForCandidates = new MockExtractor(new List <Read> { hasIndels, hasIndels2, realignSamePosNewCigar, }); var writer = new MockRealignmentWriter(new List <string> { realignSamePosNewCigar.Name, // the reads that are expected to be re-aligned }, new List <string> { hasIndels.Name, hasIndels2.Name // reads that are expected to be written but not re-aligned }); SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50, "YZAAAZZZZZZZ"); }
public void CandidateExtractFilter() { // do not take candidates from reads with MapQ of zero, or secondary alignments var highMapQ = new Read("chr", CreateAlignment("highMapQ", 0, "2M2I3M", "AAGGAAA")); var lowMapQ = new Read("chr", CreateAlignment("lowMapQ", 0, "2M2I3M", "AATTAAA", mapq: 0)); var secondaryRead = new Read("chr", CreateAlignment("secondaryRead", 0, "2M2I3M", "AACCAAA")); secondaryRead.BamAlignment.SetIsSecondaryAlignment(true); var testIndel1 = new Read("chr", CreateAlignment("testIndel1", 0, "7M", "AAGGAAA")); var testIndel2 = new Read("chr", CreateAlignment("testIndel2", 0, "7M", "AATTAAA")); var testIndel3 = new Read("chr", CreateAlignment("testIndel2", 0, "7M", "AACCAAA")); var extractorForRealign = new MockExtractor(new List <Read> { highMapQ, lowMapQ, secondaryRead, testIndel1, testIndel2, testIndel3 }); var extractorForCandidates = new MockExtractor(new List <Read> { highMapQ, lowMapQ, secondaryRead, testIndel1, testIndel2, testIndel3 }); var writer = new MockRealignmentWriter(new List <string> { // reads that are expected to be re-aligned testIndel1.Name }, new List <string> { // reads that are expected to be written but not re-aligned highMapQ.Name, lowMapQ.Name, secondaryRead.Name, testIndel2.Name, testIndel3.Name } ); SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50, allowRescoringOrig0: true, chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA"); }
public void SkipRemoveDuplicatesTrue_VariantBelowThreshold() { // 1 unique read and 1 duplicate read have indels // With duplicates not considered in the evidence counting // the frequency in this case should be 1 (uniq read with indel) / 3 (total uniq reads) = 33.3333333% var hasIndels = new Read("chr", CreateAlignment("hasIndels", 0, "2M1I3M", "AATAAA")); var uniqRef = new Read("chr", CreateAlignment("uniqRef", 0, "6M", "AAAAAA")); var uniqCandidate = new Read("chr", CreateAlignment("uniqCandidate", 0, "6M", "AATAAA")); var dupWithIndel = new Read("chr", CreateAlignment("dupWithIndels", 0, "2M1I3M", "AATAAA")); var dupRef = new Read("chr", CreateAlignment("dubRef", 0, "6M", "AAAAAA")); dupWithIndel.BamAlignment.SetIsDuplicate(true); dupRef.BamAlignment.SetIsDuplicate(true); // don't allow dups var extractorForRealign2 = new MockExtractor(new List <Read> { uniqRef, hasIndels, uniqCandidate, dupWithIndel, dupRef }); var extractorForCandidates2 = new MockExtractor(new List <Read> { uniqRef, hasIndels, uniqCandidate, dupWithIndel, dupRef }); var writerBelowThreshold = new MockRealignmentWriter(new List <string> { // the reads that are expected to be re-aligned }, new List <string> { // reads that are expected to be written but not re-aligned uniqCandidate.Name, hasIndels.Name, uniqRef.Name, }); // when the threshold is set to 0.34 (34%) this case falls below with 33.33333% // No realignment should be triggered SetupExecute(extractorForRealign2, extractorForCandidates2, writerBelowThreshold, false, 50, "AAAAAAAAAAA", 0.34f, true); }
public void SkipDuplicatesFalse_VariantAboveThreshold() { // 1 unique read and 1 duplicate read have indels // With duplicates considered in the evidence counting // the frequency in this case should be 2 (uniq & dup reads with indel) / 5 (total reads) = 40% var hasIndels = new Read("chr", CreateAlignment("hasIndels", 0, "2M2I3M", "AATTAAA")); var uniqRef = new Read("chr", CreateAlignment("uniqRef", 0, "7M", "AAAAAAA")); var uniqCandidate = new Read("chr", CreateAlignment("uniqCandidate", 0, "7M", "AATTAAA")); var dupWithIndel = new Read("chr", CreateAlignment("dupWithIndels", 0, "2M2I3M", "AATTAAA")); var dupRef = new Read("chr", CreateAlignment("dubRef", 0, "7M", "AAAAAAA")); dupWithIndel.BamAlignment.SetIsDuplicate(true); dupRef.BamAlignment.SetIsDuplicate(true); var extractorForRealign = new MockExtractor(new List <Read> { uniqRef, hasIndels, uniqCandidate, dupWithIndel, dupRef }); var extractorForCandidates = new MockExtractor(new List <Read> { uniqRef, hasIndels, uniqCandidate, dupWithIndel, dupRef }); var writer = new MockRealignmentWriter(new List <string> { uniqCandidate.Name }, new List <string> { hasIndels.Name, dupWithIndel.Name, uniqRef.Name, dupRef.Name, }); // The variant frequency (40%) in this case is above the threshold 0.39 (39%) // Realignment of candidate reads should be triggered SetupExecute(extractorForRealign, extractorForCandidates, writer, false, 50, "AAAAAAAAAAA", 0.39f); }
private void RealignAndCheckQuality(uint initialQuality, bool hasMismatches, uint expectedQuality, bool allowRescoringOrig0 = true) { var hasIndels = new Read("chr", CreateAlignment("hasIndels", 0, "2M4I3M", "AATTTTAAA")); var candidateForRealignment = new Read("chr", CreateAlignment("uniqCandidate", 0, "9M", hasMismatches ? "AATTTTAAZ" : "AATTTTAAA")); candidateForRealignment.BamAlignment.MapQuality = initialQuality; var extractorForRealign = new MockExtractor(new List <Read> { hasIndels, candidateForRealignment, }); var extractorForCandidates = new MockExtractor(new List <Read> { hasIndels, candidateForRealignment, }); var writer = new MockRealignmentWriter(new List <string> { candidateForRealignment.Name, // the reads that are expected to be re-aligned }, new List <string> { hasIndels.Name // reads that are expected to be written but not re-aligned }); SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50, allowRescoringOrig0: allowRescoringOrig0, chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA", verifyRemappedReads: (reads) => { foreach (var read in reads) { if (read.Name == candidateForRealignment.Name) { Assert.Equal(expectedQuality, read.MapQuality); } } }); }
private void SetupExecute(IAlignmentExtractor extractorForRealign, IAlignmentExtractor extractorForCandidates, MockRealignmentWriter writer, bool skipDups, int maxShift, string chrReference = null, float frequencyCutoff = 0, bool skipAndRemove = false, Action <IEnumerable <BamAlignment> > verifyRemappedReads = null, bool allowRescoringOrig0 = true, int realignWindowSize = 1000) { var ranker = new Mock <IIndelRanker>(); var realigner = new ChrRealigner(new ChrReference() { Name = "chr", Sequence = chrReference ?? string.Join(string.Empty, Enumerable.Repeat("ACGT", 10)) }, extractorForCandidates, extractorForRealign, new IndelTargetFinder(0), ranker.Object, new IndelTargetCaller(frequencyCutoff), new RealignStateManager(realignWindowSize: realignWindowSize), writer, skipDuplicates: skipDups, skipAndRemoveDuplicates: skipAndRemove, maxRealignShift: maxShift, allowRescoringOrig0: allowRescoringOrig0); realigner.Execute(); Assert.Equal(writer.ReadsExpected, writer.ReadsWritten); verifyRemappedReads?.Invoke(writer.RemappedReads); }
public void UpdateNMTag() { // should not be realigned and wrong NM will be kept var hasIndelsWrongNM = new Read("chr", CreateAlignment("hasIndelsWrongNM", 0, "2M4I3M", "AATTTTAAA", 2)); var expectZeroMismatch = new Read("chr", CreateAlignment("expectZeroMismatch", 0, "9M", "AATTTTAAA", 1)); var expectOneMismatch = new Read("chr", CreateAlignment("expectOneMismatch", 0, "9M", "AATTTTAAC", 2)); // realignment will fix wrong NM var ExpectOneMismatchWrongNM = new Read("chr", CreateAlignment("ExpectOneMismatchWrongNM", 0, "9M", "AATTTTAAC", 0)); // No NM tag to start with and new NM tag will be added var noNMTag = new Read("chr", CreateAlignment("noNMTag", 0, "9M", "AATTTTAAC")); var extractorForRealign = new MockExtractor(new List <Read> { hasIndelsWrongNM, expectZeroMismatch, expectOneMismatch, ExpectOneMismatchWrongNM, noNMTag }); var extractorForCandidates = new MockExtractor(new List <Read> { hasIndelsWrongNM, expectZeroMismatch, expectOneMismatch, ExpectOneMismatchWrongNM, noNMTag }); var writer = new MockRealignmentWriter(new List <string> { // reads that are expected to be re-aligned expectZeroMismatch.Name, expectOneMismatch.Name, ExpectOneMismatchWrongNM.Name, noNMTag.Name }, new List <string> { // reads that are expected to be written but not re-aligned hasIndelsWrongNM.Name } ); SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50, allowRescoringOrig0: true, chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA", verifyRemappedReads: (reads) => { foreach (var read in reads) { if (read.Name == hasIndelsWrongNM.Name) { Assert.Equal(2, read.GetIntTag("NM")); } if (read.Name == expectZeroMismatch.Name) { Assert.Equal(4, read.GetIntTag("NM")); } if (read.Name == expectOneMismatch.Name) { Assert.Equal(5, read.GetIntTag("NM")); } if (read.Name == ExpectOneMismatchWrongNM.Name) { Assert.Equal(5, read.GetIntTag("NM")); } if (read.Name == noNMTag.Name) { Assert.Equal(null, read.GetIntTag("NM")); } } }); }
public void Execute() { var dupRead = new Read("chr", CreateAlignment("Duplicate", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); dupRead.BamAlignment.SetIsDuplicate(true); var nonPrimaryRead = new Read("chr", CreateAlignment("NonPrimary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); nonPrimaryRead.BamAlignment.SetIsSecondaryAlignment(true); var SupplementaryRead = new Read("chr", CreateAlignment("Supplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); SupplementaryRead.BamAlignment.SetIsSupplementaryAlignment(true); var HasSupplementaryRead = new Read("chr", CreateAlignment("HasSupplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); var tagUtils = new TagUtils(); tagUtils.AddStringTag("SA", "dummy"); HasSupplementaryRead.BamAlignment.AppendTagData(tagUtils.ToBytes()); var passesSuspicion = new Read("chr", CreateAlignment("PassesSuspicion", 0, "4M", "ACGT")); var hasIndels = new Read("chr", CreateAlignment("HasIndels", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); // HasIndels shifts too far. Do not write it. Don't realign dups. var extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); var writer = new MockRealignmentWriter(new List <string> { }, new List <string> { hasIndels.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, dupRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, true, 2); // Allow realignment of dups, and increase max shift to let hasIndels through extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); writer = new MockRealignmentWriter(new List <string> { dupRead.Name, hasIndels.Name }, new List <string> { nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, false, 50); //Don't allow dups, but make max shift big enough for hasIndels extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); writer = new MockRealignmentWriter(new List <string> { hasIndels.Name, }, new List <string> { dupRead.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, true, 50); }