Beispiel #1
0
      public void KeepOriginalIfAlignmentUnchanged()
      {
          // If realignment results in same alignment, don't count it as realigned
          // If realignment results in different cigar and same position, count it as realigned
          // If realignment results in same cigar and different position, count it as realigned -- having trouble recreating this situation...
          var hasIndels              = new Read("chr", CreateAlignment("hasIndels", 1, "2M2I3M", "ZATTAAZ"));
          var hasIndels2             = new Read("chr", CreateAlignment("hasIndels2", 1, "2M2I3M", "ZATTAAZ"));
          var realignSamePosNewCigar = new Read("chr", CreateAlignment("candidateForRealignmentGoesToSamePos", 1, "7M", "ZATTAAZ"));

          var extractorForRealign = new MockExtractor(new List <Read>
            {
                hasIndels,
                hasIndels2,
                realignSamePosNewCigar,
            });
          var extractorForCandidates = new MockExtractor(new List <Read>
            {
                hasIndels,
                hasIndels2,
                realignSamePosNewCigar,
            });

          var writer = new MockRealignmentWriter(new List <string>
            {
                realignSamePosNewCigar.Name,
                // the reads that are expected to be re-aligned
            }, new List <string>
            {
                hasIndels.Name,
                hasIndels2.Name
                // reads that are expected to be written but not re-aligned
            });

          SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50, "YZAAAZZZZZZZ");
      }
Beispiel #2
0
      public void CandidateExtractFilter()
      {
          // do not take candidates from reads with MapQ of zero, or secondary alignments
          var highMapQ      = new Read("chr", CreateAlignment("highMapQ", 0, "2M2I3M", "AAGGAAA"));
          var lowMapQ       = new Read("chr", CreateAlignment("lowMapQ", 0, "2M2I3M", "AATTAAA", mapq: 0));
          var secondaryRead = new Read("chr", CreateAlignment("secondaryRead", 0, "2M2I3M", "AACCAAA"));

          secondaryRead.BamAlignment.SetIsSecondaryAlignment(true);

          var testIndel1 = new Read("chr", CreateAlignment("testIndel1", 0, "7M", "AAGGAAA"));
          var testIndel2 = new Read("chr", CreateAlignment("testIndel2", 0, "7M", "AATTAAA"));
          var testIndel3 = new Read("chr", CreateAlignment("testIndel2", 0, "7M", "AACCAAA"));

          var extractorForRealign = new MockExtractor(new List <Read>
            {
                highMapQ,
                lowMapQ,
                secondaryRead,
                testIndel1,
                testIndel2,
                testIndel3
            });
          var extractorForCandidates = new MockExtractor(new List <Read>
            {
                highMapQ,
                lowMapQ,
                secondaryRead,
                testIndel1,
                testIndel2,
                testIndel3
            });

          var writer = new MockRealignmentWriter(new List <string>
            {
                // reads that are expected to be re-aligned
                testIndel1.Name
            }, new List <string>
            {
                // reads that are expected to be written but not re-aligned
                highMapQ.Name,
                lowMapQ.Name,
                secondaryRead.Name,
                testIndel2.Name,
                testIndel3.Name
            }
                                                 );


          SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50,
                       allowRescoringOrig0: true, chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA");
      }
Beispiel #3
0
      public void SkipRemoveDuplicatesTrue_VariantBelowThreshold()
      {
          // 1 unique read and 1 duplicate read have indels
          // With duplicates not considered in the evidence counting
          // the frequency in this case should be 1 (uniq read with indel) / 3 (total uniq reads) = 33.3333333%
          var hasIndels     = new Read("chr", CreateAlignment("hasIndels", 0, "2M1I3M", "AATAAA"));
          var uniqRef       = new Read("chr", CreateAlignment("uniqRef", 0, "6M", "AAAAAA"));
          var uniqCandidate = new Read("chr", CreateAlignment("uniqCandidate", 0, "6M", "AATAAA"));
          var dupWithIndel  = new Read("chr", CreateAlignment("dupWithIndels", 0, "2M1I3M", "AATAAA"));
          var dupRef        = new Read("chr", CreateAlignment("dubRef", 0, "6M", "AAAAAA"));

          dupWithIndel.BamAlignment.SetIsDuplicate(true);
          dupRef.BamAlignment.SetIsDuplicate(true);
          // don't allow dups


          var extractorForRealign2 = new MockExtractor(new List <Read>
            {
                uniqRef,
                hasIndels,
                uniqCandidate,
                dupWithIndel,
                dupRef
            });
          var extractorForCandidates2 = new MockExtractor(new List <Read>
            {
                uniqRef,
                hasIndels,
                uniqCandidate,
                dupWithIndel,
                dupRef
            });

          var writerBelowThreshold = new MockRealignmentWriter(new List <string>
            {
                // the reads that are expected to be re-aligned
            }, new List <string>
            {
                // reads that are expected to be written but not re-aligned
                uniqCandidate.Name, hasIndels.Name, uniqRef.Name,
            });

          // when the threshold is set to 0.34 (34%) this case falls below with 33.33333%
          // No realignment should be triggered
          SetupExecute(extractorForRealign2, extractorForCandidates2, writerBelowThreshold, false, 50, "AAAAAAAAAAA", 0.34f, true);
      }
Beispiel #4
0
      public void SkipDuplicatesFalse_VariantAboveThreshold()
      {
          // 1 unique read and 1 duplicate read have indels
          // With duplicates considered in the evidence counting
          // the frequency in this case should be 2 (uniq & dup reads with indel) / 5 (total reads) = 40%
          var hasIndels     = new Read("chr", CreateAlignment("hasIndels", 0, "2M2I3M", "AATTAAA"));
          var uniqRef       = new Read("chr", CreateAlignment("uniqRef", 0, "7M", "AAAAAAA"));
          var uniqCandidate = new Read("chr", CreateAlignment("uniqCandidate", 0, "7M", "AATTAAA"));
          var dupWithIndel  = new Read("chr", CreateAlignment("dupWithIndels", 0, "2M2I3M", "AATTAAA"));
          var dupRef        = new Read("chr", CreateAlignment("dubRef", 0, "7M", "AAAAAAA"));

          dupWithIndel.BamAlignment.SetIsDuplicate(true);
          dupRef.BamAlignment.SetIsDuplicate(true);

          var extractorForRealign = new MockExtractor(new List <Read>
            {
                uniqRef,
                hasIndels,
                uniqCandidate,
                dupWithIndel,
                dupRef
            });
          var extractorForCandidates = new MockExtractor(new List <Read>
            {
                uniqRef,
                hasIndels,
                uniqCandidate,
                dupWithIndel,
                dupRef
            });

          var writer = new MockRealignmentWriter(new List <string>
            {
                uniqCandidate.Name
            }, new List <string>
            {
                hasIndels.Name, dupWithIndel.Name,
                uniqRef.Name, dupRef.Name,
            });

          // The variant frequency (40%) in this case is above the threshold 0.39 (39%)
          // Realignment of candidate reads should be triggered
          SetupExecute(extractorForRealign, extractorForCandidates, writer, false, 50, "AAAAAAAAAAA", 0.39f);
      }
Beispiel #5
0
      private void RealignAndCheckQuality(uint initialQuality, bool hasMismatches, uint expectedQuality, bool allowRescoringOrig0 = true)
      {
          var hasIndels = new Read("chr", CreateAlignment("hasIndels", 0, "2M4I3M", "AATTTTAAA"));
          var candidateForRealignment = new Read("chr", CreateAlignment("uniqCandidate", 0, "9M", hasMismatches ? "AATTTTAAZ" : "AATTTTAAA"));

          candidateForRealignment.BamAlignment.MapQuality = initialQuality;

          var extractorForRealign = new MockExtractor(new List <Read>
            {
                hasIndels,
                candidateForRealignment,
            });
          var extractorForCandidates = new MockExtractor(new List <Read>
            {
                hasIndels,
                candidateForRealignment,
            });

          var writer = new MockRealignmentWriter(new List <string>
            {
                candidateForRealignment.Name,
                // the reads that are expected to be re-aligned
            }, new List <string>
            {
                hasIndels.Name
                // reads that are expected to be written but not re-aligned
            });


          SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50,
                       allowRescoringOrig0: allowRescoringOrig0,
                       chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA", verifyRemappedReads: (reads) =>
            {
                foreach (var read in reads)
                {
                    if (read.Name == candidateForRealignment.Name)
                    {
                        Assert.Equal(expectedQuality, read.MapQuality);
                    }
                }
            });
      }
Beispiel #6
0
      private void SetupExecute(IAlignmentExtractor extractorForRealign, IAlignmentExtractor extractorForCandidates, MockRealignmentWriter writer,
                                bool skipDups, int maxShift, string chrReference = null, float frequencyCutoff = 0, bool skipAndRemove = false, Action <IEnumerable <BamAlignment> > verifyRemappedReads = null, bool allowRescoringOrig0 = true, int realignWindowSize = 1000)
      {
          var ranker = new Mock <IIndelRanker>();

          var realigner = new ChrRealigner(new ChrReference()
            {
                Name = "chr", Sequence = chrReference ?? string.Join(string.Empty, Enumerable.Repeat("ACGT", 10))
            },
                                           extractorForCandidates,
                                           extractorForRealign, new IndelTargetFinder(0), ranker.Object,
                                           new  IndelTargetCaller(frequencyCutoff),
                                           new RealignStateManager(realignWindowSize: realignWindowSize), writer, skipDuplicates: skipDups, skipAndRemoveDuplicates: skipAndRemove, maxRealignShift: maxShift, allowRescoringOrig0: allowRescoringOrig0);

          realigner.Execute();
          Assert.Equal(writer.ReadsExpected, writer.ReadsWritten);

          verifyRemappedReads?.Invoke(writer.RemappedReads);
      }
Beispiel #7
0
      public void UpdateNMTag()
      {
          // should not be realigned and wrong NM will be kept
          var hasIndelsWrongNM   = new Read("chr", CreateAlignment("hasIndelsWrongNM", 0, "2M4I3M", "AATTTTAAA", 2));
          var expectZeroMismatch = new Read("chr", CreateAlignment("expectZeroMismatch", 0, "9M", "AATTTTAAA", 1));
          var expectOneMismatch  = new Read("chr", CreateAlignment("expectOneMismatch", 0, "9M", "AATTTTAAC", 2));
          // realignment will fix wrong NM
          var ExpectOneMismatchWrongNM = new Read("chr", CreateAlignment("ExpectOneMismatchWrongNM", 0, "9M", "AATTTTAAC", 0));
          // No NM tag to start with and new NM tag will be added
          var noNMTag = new Read("chr", CreateAlignment("noNMTag", 0, "9M", "AATTTTAAC"));

          var extractorForRealign = new MockExtractor(new List <Read>
            {
                hasIndelsWrongNM,
                expectZeroMismatch,
                expectOneMismatch,
                ExpectOneMismatchWrongNM,
                noNMTag
            });
          var extractorForCandidates = new MockExtractor(new List <Read>
            {
                hasIndelsWrongNM,
                expectZeroMismatch,
                expectOneMismatch,
                ExpectOneMismatchWrongNM,
                noNMTag
            });

          var writer = new MockRealignmentWriter(new List <string>
            {
                // reads that are expected to be re-aligned
                expectZeroMismatch.Name,
                expectOneMismatch.Name,
                ExpectOneMismatchWrongNM.Name,
                noNMTag.Name
            }, new List <string>
            {
                // reads that are expected to be written but not re-aligned
                hasIndelsWrongNM.Name
            }
                                                 );


          SetupExecute(extractorForRealign, extractorForCandidates, writer, true, 50,
                       allowRescoringOrig0: true,
                       chrReference: "AAAAAAAAAAAAAAAAAAAAAAAAAA", verifyRemappedReads: (reads) =>
            {
                foreach (var read in reads)
                {
                    if (read.Name == hasIndelsWrongNM.Name)
                    {
                        Assert.Equal(2, read.GetIntTag("NM"));
                    }
                    if (read.Name == expectZeroMismatch.Name)
                    {
                        Assert.Equal(4, read.GetIntTag("NM"));
                    }
                    if (read.Name == expectOneMismatch.Name)
                    {
                        Assert.Equal(5, read.GetIntTag("NM"));
                    }
                    if (read.Name == ExpectOneMismatchWrongNM.Name)
                    {
                        Assert.Equal(5, read.GetIntTag("NM"));
                    }
                    if (read.Name == noNMTag.Name)
                    {
                        Assert.Equal(null, read.GetIntTag("NM"));
                    }
                }
            });
      }
Beispiel #8
0
      public void Execute()
      {
          var dupRead = new Read("chr", CreateAlignment("Duplicate", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC"));

          dupRead.BamAlignment.SetIsDuplicate(true);
          var nonPrimaryRead = new Read("chr", CreateAlignment("NonPrimary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC"));

          nonPrimaryRead.BamAlignment.SetIsSecondaryAlignment(true);
          var SupplementaryRead = new Read("chr", CreateAlignment("Supplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC"));

          SupplementaryRead.BamAlignment.SetIsSupplementaryAlignment(true);
          var HasSupplementaryRead = new Read("chr", CreateAlignment("HasSupplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC"));
          var tagUtils             = new TagUtils();

          tagUtils.AddStringTag("SA", "dummy");
          HasSupplementaryRead.BamAlignment.AppendTagData(tagUtils.ToBytes());
          var passesSuspicion = new Read("chr", CreateAlignment("PassesSuspicion", 0, "4M", "ACGT"));
          var hasIndels       = new Read("chr", CreateAlignment("HasIndels", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC"));

          // HasIndels shifts too far. Do not write it. Don't realign dups.
          var extractorForRealign = new MockExtractor(new List <Read>
            {
                CopyRead(dupRead),
                CopyRead(nonPrimaryRead),
                CopyRead(SupplementaryRead),
                CopyRead(HasSupplementaryRead),
                CopyRead(passesSuspicion),
                CopyRead(hasIndels)
            });
          var writer = new MockRealignmentWriter(new List <string>
            {
            }, new List <string>
            {
                hasIndels.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, dupRead.Name, passesSuspicion.Name
            });

          SetupMocksandExecute(extractorForRealign, writer, true, 2);

          // Allow realignment of dups, and increase max shift to let hasIndels through
          extractorForRealign = new MockExtractor(new List <Read>
            {
                CopyRead(dupRead),
                CopyRead(nonPrimaryRead),
                CopyRead(SupplementaryRead),
                CopyRead(HasSupplementaryRead),
                CopyRead(passesSuspicion),
                CopyRead(hasIndels)
            });
          writer = new MockRealignmentWriter(new List <string>
            {
                dupRead.Name,
                hasIndels.Name
            }, new List <string>
            {
                nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name
            });

          SetupMocksandExecute(extractorForRealign, writer, false, 50);

          //Don't allow dups, but make max shift big enough for hasIndels
          extractorForRealign = new MockExtractor(new List <Read>
            {
                CopyRead(dupRead),
                CopyRead(nonPrimaryRead),
                CopyRead(SupplementaryRead),
                CopyRead(HasSupplementaryRead),
                CopyRead(passesSuspicion),
                CopyRead(hasIndels)
            });
          writer = new MockRealignmentWriter(new List <string>
            {
                hasIndels.Name,
            }, new List <string>
            {
                dupRead.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name
            });

          SetupMocksandExecute(extractorForRealign, writer, true, 50);
      }