Beispiel #1
0
        private void AddStatusInfo(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels,
                                   RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels,
                                   AlignmentSummary originalAlignmentSummary)
        {
            _statusCounter.AddStatusCount("INDEL STATUS\tAccepted\t" + realignResult.Indels);

            _statusCounter.AddStatusCount($"Successfully realigned (ps: {selectedIndels != null})");
            _statusCounter.AppendStatusStringTag("RX",
                                                 $"Successfully realigned after {realignResult.Attempts} attempts, indel is {string.Join("|", realignResult.AcceptedIndels)}",
                                                 bamAlignment);

            if (existingIndels != null && existingIndels.Any())
            {
                _statusCounter.AppendStatusStringTag("RX",
                                                     $"Orig indels:{string.Join("|", existingIndels)}__New indels:{realignResult.Indels}",
                                                     bamAlignment);
                _statusCounter.AddStatusCount(
                    $"Replaced existing indels (nonsanctioned: {hasExistingUnsanctionedIndels})");
            }

            bamAlignment.ReplaceOrAddStringTag("OC", $"{origBamAlignment.CigarData}");
            bamAlignment.ReplaceOrAddStringTag("OS",
                                               $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q");
            bamAlignment.ReplaceOrAddStringTag("RS",
                                               $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q");
        }
Beispiel #2
0
        public override int CompareAlignments(AlignmentSummary originalAlignmentSummary, AlignmentSummary realignResult)
        {
            var originalScore  = _alignmentScorer.GetAlignmentScore(originalAlignmentSummary);
            var realignedScore = _alignmentScorer.GetAlignmentScore(realignResult);

            if (_debug)
            {
                var origScoreString = originalAlignmentSummary.Cigar + "," + originalAlignmentSummary.NumMismatches + "," +
                                      originalScore;

                var realignedScoreString = realignResult.Cigar + "," + realignResult.NumMismatches + "," +
                                           realignedScore;
                Console.WriteLine(origScoreString + "," + realignedScoreString + "," + (realignedScore > originalScore));
            }

            if (originalScore > realignedScore)
            {
                return(1);
            }
            if (realignedScore > originalScore)
            {
                return(-1);
            }
            return(0);
        }
Beispiel #3
0
        private void HandleAcceptedRealignment(BamAlignment origBamAlignment, List <PreIndel> selectedIndels,
                                               List <PreIndel> existingIndels,
                                               RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels,
                                               AlignmentSummary originalAlignmentSummary)
        {
            bamAlignment.Position  = realignResult.Position - 1; // 0 base
            bamAlignment.CigarData = realignResult.Cigar;

            if (_lightDebug)
            {
                AddStatusInfo(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary);
            }

            _statusCounter.AppendStatusStringTag("RC", bamAlignment.GetStringTag("RC"), bamAlignment);
            if (bamAlignment.MapQuality <= 20 && realignResult.NumMismatches == 0 &&
                (_allowRescoringOrig0 || bamAlignment.MapQuality > 0))
            {
                bamAlignment.MapQuality = 40; // todo what to set this to?
            }
            // Nify if using pair-specific indels
            if (realignResult.NifiedAt != null && realignResult.NifiedAt.Any())
            {
                foreach (var i in realignResult.NifiedAt)
                {
                    bamAlignment.Qualities[i] = 0;
                }

                _statusCounter.AddStatusCount(
                    $"Successfully realigned with mismatch-insertion quality adjusted (ps: {selectedIndels != null})");
                _statusCounter.AppendStatusStringTag("RX",
                                                     $"Successfully realigned with mismatch-insertion quality adjusted ({string.Join(",", realignResult.NifiedAt)}",
                                                     bamAlignment);
            }
        }
Beispiel #4
0
        private bool ResultIsGoodEnough(RealignmentResult realignResult, BamAlignment origBamAlignment,
                                        AlignmentSummary originalAlignmentSummary, bool realignmentUnchanged, bool isPairAware)
        {
            if (realignmentUnchanged)
            {
                if (realignResult.NifiedAt.Any())
                {
                    return(true);
                }
                _statusCounter.AppendStatusStringTag("RX", "Not taking realignment: unchanged", origBamAlignment);
                _statusCounter.AddStatusCount("Not taking realignment: unchanged");
                return(false);
            }

            if (!_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, isPairAware))
            {
                _statusCounter.AppendStatusStringTag("RX", $"Realignment failed:not better ({originalAlignmentSummary.Cigar}->{realignResult.Cigar}): {realignResult.Conclusion}", origBamAlignment);
                _statusCounter.UpdateStatusStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q", origBamAlignment);
                _statusCounter.UpdateStatusStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q", origBamAlignment);

                _statusCounter.AddStatusCount("Not taking realignment: not better");
                return(false);
            }

            return(true);
        }
 public int GetAlignmentScore(AlignmentSummary summary)
 {
     return
         (MismatchCoefficient * summary.NumMismatches +
          IndelCoefficient * summary.NumIndels +
          IndelLengthCoefficient * summary.NumIndelBases +
          NonNSoftclipCoefficient * summary.NumNonNSoftclips +
          AnchorLengthCoefficient * summary.AnchorLength);
 }
 public override int CompareAlignmentsWithOriginal(AlignmentSummary other, AlignmentSummary original,
                                                   bool treatKindly = false)
 {
     if (treatKindly)
     {
         if (other.NumMismatches <= 1 &&
             other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip)
         {
             return(1);
         }
     }
     return(CompareAlignmentsWithOriginal2(other, original));
 }
Beispiel #7
0
        public override int CompareAlignmentsWithOriginal(AlignmentSummary realignResult, AlignmentSummary originalAlignmentSummary)
        {
            var originalScore  = _alignmentScorer.GetAlignmentScore(originalAlignmentSummary);
            var realignedScore = _alignmentScorer.GetAlignmentScore(realignResult);

            if (realignedScore > originalScore)
            {
                return(1);
            }
            if (originalScore > realignedScore)
            {
                return(-1);
            }
            return(0);
        }
Beispiel #8
0
        private void UpdateIndelOutcomes(int numIndels, List <HashableIndel> orderedIndels, bool hasAnyIndels,
                                         List <int> acceptedIndels, List <HashableIndel> confirmedAcceptedIndels, bool acceptedRealignment,
                                         AlignmentSummary realignResult)
        {
            for (int i = 0; i < numIndels; i++)
            {
                var indel = orderedIndels[i];

                int[] outcomesForIndel;

                if (!_indelOutcomes.TryGetValue(indel, out outcomesForIndel))
                {
                    // success, failure, Rank, numIndels, multis, confirmed, acceptRealn, otherAccepted
                    outcomesForIndel = new int[8];
                    _indelOutcomes.Add(indel, outcomesForIndel);
                }

                if (hasAnyIndels && acceptedIndels.Contains(i))
                {
                    outcomesForIndel[0]++;
                    outcomesForIndel[2] += i + 1;

                    if (acceptedRealignment)
                    {
                        outcomesForIndel[6]++;
                    }

                    var realignmentIsVeryConfident = _judger.IsVeryConfident(realignResult);

                    if (realignmentIsVeryConfident)
                    {
                        confirmedAcceptedIndels?.Add(indel);
                    }
                }
                else
                {
                    outcomesForIndel[1]++;

                    if (acceptedRealignment)
                    {
                        outcomesForIndel[7]++;
                    }
                }

                outcomesForIndel[3] += numIndels;
                outcomesForIndel[4] += acceptedIndels?.Count > 1 ? 1 : 0;
            }
        }
        /// <summary>
        /// When comparing results:
        /// - Always minimize number of mismatches, regardless of number of indels
        /// - Given same number of mismatches, prefer fewer non-N softclips (0 better than 1, 1 better than 2)
        /// - Given same number of mismatches, prefer fewer indels (0 better than 1, 1 better than 2)
        ///
        /// This maps to the following scenarios (written out to be explicit)
        /// indels =, mismatch =, 0
        /// indels =, mismatch <, 1
        /// indels =, mismatch >, -1
        /// indels <, mismatch =, 1
        /// indels <, mismatch <, 1
        /// indels <, mismatch >, -1
        /// indels >, mismatch =, -1
        /// indels >, mismatch <, 1
        /// indels >, mismatch >, -1
        /// </summary>
        /// <param name="other"></param>
        /// <returns></returns>
        public override int CompareAlignments(AlignmentSummary original, AlignmentSummary other)
        {
            if (other == null)
            {
                return(1);
            }

            if (original.NumMismatches == 1 && original.NumIndels == 0 && other.NumIndels > 1)
            {
                return(1);
            }
            if (other.NumMismatches == 1 && other.NumIndels == 0 && original.NumIndels > 1)
            {
                return(-1);
            }

            if (original.NumMismatches < other.NumMismatches)
            {
                return(1);
            }
            if (original.NumMismatches > other.NumMismatches)
            {
                return(-1);
            }

            if (original.NumNonNSoftclips < other.NumNonNSoftclips)
            {
                return(1);
            }
            if (original.NumNonNSoftclips > other.NumNonNSoftclips)
            {
                return(-1);
            }

            if (original.NumIndels < other.NumIndels)
            {
                return(1);
            }
            if (original.NumIndels > other.NumIndels)
            {
                return(-1);
            }
            return(0);
        }
Beispiel #10
0
        private bool PassesSuspicion(AlignmentSummary originalResult)
        {
            var isRealignableSoftclip = _tryRealignCleanSoftclippedReads && originalResult.NumNonNSoftclips > 0;

            if (isRealignableSoftclip)
            {
                return(false);
            }

            if (originalResult.NumMismatchesIncludeSoftclip == 0 && originalResult.NumIndels == 0)
            {
                return(true);
            }

            // need to try against one of the priors
            // if (originalResult.NumIndels > 0) return false;

            // if there are only just mismatches and some are at the tail end of the read, flag it!
            // jg todo make this threshold configurable
            //return originalResult.MinNumAnchorMatches.HasValue
            //    && originalResult.MinNumAnchorMatches > _anchorSizeThreshold;

            return(false);
        }
        public void AlignmentScorer()
        {
            var scorer = new AlignmentScorer();

            var perfect  = new AlignmentSummary();
            var oneIndel = new AlignmentSummary()
            {
                NumIndels = 1
            };
            var twoIndels = new AlignmentSummary()
            {
                NumIndels = 2,
            };
            var oneMismatch = new AlignmentSummary()
            {
                NumMismatches = 1
            };
            var twoMismatches = new AlignmentSummary()
            {
                NumMismatches = 2,
            };
            var oneIndelOneMismatch = new AlignmentSummary()
            {
                NumIndels     = 1,
                NumMismatches = 1
            };
            var everything = new AlignmentSummary()
            {
                NumIndels        = 1,
                NumMismatches    = 1,
                NumIndelBases    = 1,
                NumNonNSoftclips = 1,
                AnchorLength     = 1
            };

            // By default, everything is 0
            Assert.Equal(0, scorer.GetAlignmentScore(perfect));
            Assert.Equal(0, scorer.GetAlignmentScore(oneIndel));
            Assert.Equal(0, scorer.GetAlignmentScore(twoIndels));
            Assert.Equal(0, scorer.GetAlignmentScore(oneIndelOneMismatch));

            // Count against mismatches: -1 score for each
            scorer = new AlignmentScorer()
            {
                MismatchCoefficient = -1
            };
            Assert.Equal(-1, scorer.GetAlignmentScore(oneMismatch));
            Assert.Equal(-1, scorer.GetAlignmentScore(oneIndelOneMismatch));
            Assert.Equal(-2, scorer.GetAlignmentScore(twoMismatches));
            Assert.Equal(0, scorer.GetAlignmentScore(oneIndel));
            Assert.Equal(0, scorer.GetAlignmentScore(twoIndels));

            // Count against indels: -1 score for each
            scorer = new AlignmentScorer()
            {
                IndelCoefficient = -1
            };
            Assert.Equal(0, scorer.GetAlignmentScore(oneMismatch));
            Assert.Equal(-1, scorer.GetAlignmentScore(oneIndelOneMismatch));
            Assert.Equal(0, scorer.GetAlignmentScore(twoMismatches));
            Assert.Equal(-1, scorer.GetAlignmentScore(oneIndel));
            Assert.Equal(-2, scorer.GetAlignmentScore(twoIndels));

            // Count against indels and mismatches
            scorer = new AlignmentScorer()
            {
                IndelCoefficient = -3, MismatchCoefficient = -1
            };
            Assert.Equal(-1, scorer.GetAlignmentScore(oneMismatch));
            Assert.Equal(-4, scorer.GetAlignmentScore(oneIndelOneMismatch));
            Assert.Equal(-2, scorer.GetAlignmentScore(twoMismatches));
            Assert.Equal(-3, scorer.GetAlignmentScore(oneIndel));
            Assert.Equal(-6, scorer.GetAlignmentScore(twoIndels));

            // Make sure the other stuff is working
            scorer = new AlignmentScorer()
            {
                IndelLengthCoefficient = 1
            };
            Assert.Equal(1, scorer.GetAlignmentScore(everything));
            scorer.IndelCoefficient = 1;
            Assert.Equal(2, scorer.GetAlignmentScore(everything));
            scorer.MismatchCoefficient = 1;
            Assert.Equal(3, scorer.GetAlignmentScore(everything));
            scorer.NonNSoftclipCoefficient = 1;
            Assert.Equal(4, scorer.GetAlignmentScore(everything));
            scorer.AnchorLengthCoefficient = 1;
            Assert.Equal(5, scorer.GetAlignmentScore(everything));
        }
Beispiel #12
0
        public void CompareAlignments()
        {
            var comparer = new BasicAlignmentComparer();

            var perfect      = new AlignmentSummary();
            var otherPerfect = new AlignmentSummary();
            var oneMismatch  = new AlignmentSummary();

            oneMismatch.NumMismatches = 1;
            var oneIndel = new AlignmentSummary();

            oneIndel.NumIndels = 1;
            var oneIndelOneMismatch = new AlignmentSummary();

            oneIndelOneMismatch.NumMismatches = 1;
            oneIndelOneMismatch.NumIndels     = 1;

            Assert.Equal(1, comparer.CompareAlignments(perfect, null));

            // indels =, mismatch =, 0
            Assert.Equal(0, comparer.CompareAlignments(perfect, otherPerfect));

            // indels =, mismatch <, 1
            Assert.Equal(1, comparer.CompareAlignments(perfect, oneMismatch));

            // indels =, mismatch >, -1
            Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, perfect));

            // indels <, mismatch =, 1
            Assert.Equal(1, comparer.CompareAlignments(perfect, oneIndel));

            // indels <, mismatch <, 1
            Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneMismatch));

            // indels <, mismatch >, -1
            Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneIndel));

            // indels >, mismatch =, -1
            Assert.Equal(-1, comparer.CompareAlignments(oneIndel, perfect));

            // indels >, mismatch <, 1
            Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneMismatch));

            // indels >, mismatch >, -1
            Assert.Equal(-1, comparer.CompareAlignments(oneIndelOneMismatch, perfect));

            var twoIndels = new AlignmentSummary()
            {
                NumIndels = 2, NumMismatches = 0
            };
            var twoMismatches = new AlignmentSummary()
            {
                NumIndels = 0, NumMismatches = 2
            };

            // 1 mismatch and 0 indels in first, 2 indels and 0 mismatches in second, favor the first
            Assert.Equal(1, comparer.CompareAlignments(oneMismatch, twoIndels));

            // 1 mismatch and 0 indels in first, 1 indel in second, favor the second
            Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneIndel));

            // 2 mismatches and 0 indels in first, 2 indels in second, favor the second -- special rule only applies to single-mismatch reads (with no indels)
            Assert.Equal(-1, comparer.CompareAlignments(twoMismatches, twoIndels));

            // 1 mismatch and 1 indel in first, 2 indels and 0 mismatches in second, favor the second -- special rule only applies to single-mismatch reads (with no indels)
            Assert.Equal(-1, comparer.CompareAlignments(oneIndelOneMismatch, twoIndels));

            var oneMismatchOneSoftclip = new AlignmentSummary()
            {
                NumNonNSoftclips = 1, NumMismatches = 1
            };
            var oneSoftclip = new AlignmentSummary()
            {
                NumNonNSoftclips = 1
            };
            var oneIndelOneSoftclip = new AlignmentSummary()
            {
                NumIndels = 1, NumNonNSoftclips = 1
            };

            // 1 mismatch and 1 softclip, 0 mismatch and 1 softclip, favor the second
            Assert.Equal(-1, comparer.CompareAlignments(oneMismatchOneSoftclip, oneSoftclip));

            // 1 mismatch and 0 softclip, 0 mimatch and 1 softclip, favor the second
            Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneSoftclip));

            // 0 mismatch and 0 softclip, 0 mismatch and 1 softclip, favor the first
            Assert.Equal(1, comparer.CompareAlignments(perfect, oneSoftclip));

            // 1 indel and 0 mismatch/sc, 1 indel and 1 softclip, favor the first
            Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneSoftclip));

            // 2 indel and 0 mismatch/sc, 1 indel and 1 softclip, favor the first
            Assert.Equal(1, comparer.CompareAlignments(twoIndels, oneIndelOneSoftclip));

            // 1 indel and 0 mismatch/sc, 0 indel and 1 softclip, favor the first
            Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneSoftclip));

            // 2 indel and 0 mismatch/sc, 0 indel and 1 softclip, favor the first
            Assert.Equal(1, comparer.CompareAlignments(twoIndels, oneSoftclip));
        }
Beispiel #13
0
        public void CompareAlignmentsWithOriginal()
        {
            var comparer = new BasicAlignmentComparer();

            var perfect      = new AlignmentSummary();
            var otherPerfect = new AlignmentSummary();
            var oneMismatch  = new AlignmentSummary();

            oneMismatch.NumMismatchesIncludeSoftclip = 1;
            var oneIndel = new AlignmentSummary();

            oneIndel.NumIndels = 1;
            var oneIndel2 = new AlignmentSummary();

            oneIndel2.NumIndels = 1;
            var oneIndelOneMismatch = new AlignmentSummary();

            oneIndelOneMismatch.NumMismatchesIncludeSoftclip = 1;
            oneIndelOneMismatch.NumIndels = 1;
            var twoIndels = new AlignmentSummary()
            {
                NumIndels = 2, NumMismatchesIncludeSoftclip = 0
            };
            var twoIndels2 = new AlignmentSummary()
            {
                NumIndels = 2, NumMismatchesIncludeSoftclip = 0
            };
            var twoMismatches = new AlignmentSummary()
            {
                NumIndels = 0, NumMismatchesIncludeSoftclip = 2
            };

            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(perfect, null));

            // --------------
            // realignment has zero mismatch
            // --------------
            // both perfect, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(perfect, otherPerfect));

            // indels both 0, mismatch smaller by 1, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(perfect, oneMismatch));

            // special rule for one indel vs. one mismatch , pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneMismatch));

            // gain one indel, mismatch both 0, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(oneIndel, perfect));

            // gain one indel, mismatch smaller by 2, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, twoMismatches));

            // indels both 1, mismatch both 0, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneIndel2));

            // indels both 1, mismatch smaller by 1, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneIndelOneMismatch));

            // special rule doesn't apply to two indels, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneMismatch));

            // gain two indels, mismatch both 0, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(twoIndels, perfect));

            // gain two indels, mismatch smaller by 2, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, twoMismatches));

            // indels both 2, mismatch both 0, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, twoIndels2));

            // gain one indel, mismatch smaller by 1, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneIndelOneMismatch));

            // gain one indel, mismatch both 0, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneIndel));



            // --------------
            // realignment has >=1 mismatch
            // --------------

            // --------------
            // short indels <= 3bp
            // --------------
            var shortIndelOneMismatchNew = new AlignmentSummary()
            {
                NumIndelBases = 3, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C"
                }
            };
            var shortIndelOneMismatchShared = new AlignmentSummary()
            {
                NumIndelBases = 3, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "3_A_C"
                }
            };
            var zeroIndelWithFourMismatch = new AlignmentSummary()
            {
                NumMismatchesIncludeSoftclip = 4, MismatchesIncludeSoftclip = new List <string> {
                    "0_A_C", "1_A_C", "2_A_C", "3_A_C"
                }
            };
            var zeroIndelWithThreeMismatch = new AlignmentSummary()
            {
                NumMismatchesIncludeSoftclip = 3, MismatchesIncludeSoftclip = new List <string> {
                    "1_A_C", "2_A_C", "3_A_C"
                }
            };

            // realignment introduced a new mismatch, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchNew, zeroIndelWithFourMismatch));

            // the one mismatch exists in both original and realignment, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchShared, zeroIndelWithFourMismatch));

            // reduction of mismatch < 3, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchShared, zeroIndelWithThreeMismatch));


            // --------------
            // indels > 3bp
            // --------------
            var MediumIndelOneMismatchNew = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C"
                }
            };
            var MediumIndelOneMismatchShared = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "3_A_C"
                }
            };
            var MediumIndelTwoMismatchBothNew = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C", "6_A_C"
                }
            };
            var MediumIndelTwoMismatchOneShared = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "2_A_C", "5_A_C"
                }
            };
            var zeroIndelWithFiveMismatch = new AlignmentSummary()
            {
                NumMismatchesIncludeSoftclip = 5, MismatchesIncludeSoftclip = new List <string> {
                    "0_A_C", "1_A_C", "2_A_C", "3_A_C", "4_A_C"
                }
            };

            // mismatch smaller by 2, realignment has one mismatch, shared with original, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchShared, zeroIndelWithThreeMismatch));

            // mismatch smaller by 3, realignment has one mismatch, shared with original, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchShared, zeroIndelWithFourMismatch));

            // mismatch smaller by 2, realignment created 1 new mismatch, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchNew, zeroIndelWithThreeMismatch));

            // mismatch smaller by 3, realignment created 1 new mismatch, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchNew, zeroIndelWithFourMismatch));

            // mismatch smaller by 3, but realignment created 2 new mismatches, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchBothNew, zeroIndelWithFiveMismatch));

            // mismatch smaller by 3, realignment has two mismatches, one introduced by indel, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchOneShared, zeroIndelWithFiveMismatch));

            // mismatch smaller by 2, realignment has two mismatches, one introduced by indel, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchOneShared, zeroIndelWithFourMismatch));


            var LongIndelOneMismatchShared = new AlignmentSummary()
            {
                NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C"
                }
            };
            var LongIndelTwoMismatchOneShared = new AlignmentSummary()
            {
                NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "2_A_C", "5_A_C"
                }
            };
            var LongIndelTwoMismatchBothNew = new AlignmentSummary()
            {
                NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C", "6_A_C"
                }
            };

            var HighFrequencyIndelOneMismatchShared = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, HasHighFrequencyIndel = true, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C"
                }
            };
            var HighFrequencyIndelTwoMismatchOneShared = new AlignmentSummary()
            {
                NumIndelBases = 4, NumIndels = 1, HasHighFrequencyIndel = true, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "2_A_C", "5_A_C"
                }
            };
            var HighFrequencyIndelTwoMismatchBothNew = new AlignmentSummary()
            {
                NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "5_A_C", "6_A_C"
                }
            };

            var zeroIndelWithTwoMismatch = new AlignmentSummary()
            {
                NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> {
                    "1_A_C", "2_A_C"
                }
            };


            // mismatch smaller by 2 (3->1), realignment has one mismatch, shared with original, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(LongIndelOneMismatchShared, zeroIndelWithThreeMismatch));

            // mismatch smaller by 1 (2->1), realignment has one mismatch, shared with original, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelOneMismatchShared, zeroIndelWithTwoMismatch));

            // mismatch smaller by 2 (4->2), realignment has two mismatches, one introduced by indel, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchOneShared, zeroIndelWithFourMismatch));

            // mismatch smaller by 1 (3->2), realignment has two mismatches, one introduced by indel, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchOneShared, zeroIndelWithThreeMismatch));

            // mismatch smaller by 2 (4->2), but realignment created 2 new mismatches, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchBothNew, zeroIndelWithFourMismatch));

            // mismatch smaller by 2 (3->1), realignment has one mismatch, shared with original, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelOneMismatchShared, zeroIndelWithThreeMismatch));

            // mismatch smaller by 1 (2->1), realignment has one mismatch, shared with original, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelOneMismatchShared, zeroIndelWithTwoMismatch));

            // mismatch smaller by 2 (4->2), realignment has two mismatches, one introduced by indel, pick new
            Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchOneShared, zeroIndelWithFourMismatch));

            // mismatch smaller by 1 (3->2), realignment has two mismatches, one introduced by indel, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchOneShared, zeroIndelWithThreeMismatch));

            // mismatch smaller by 2 (4->2), but realignment created 2 new mismatches, pick original
            Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchBothNew, zeroIndelWithFourMismatch));
        }
 public abstract int CompareAlignmentsWithOriginal(AlignmentSummary preferred, AlignmentSummary other);
Beispiel #15
0
        private BamAlignment AcceptRealignment(BamAlignment origBamAlignment, out bool changed, List <PreIndel> selectedIndels,
                                               List <PreIndel> existingIndels, RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary,
                                               BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, out bool confirmed)
        {
            HandleAcceptedRealignment(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment,
                                      hasExistingUnsanctionedIndels, originalAlignmentSummary);

            confirmed = false;
            changed   = true;

            return(bamAlignment);
        }
Beispiel #16
0
 public abstract int CompareAlignmentsWithOriginal(AlignmentSummary preferred, AlignmentSummary other, bool treatKindly = false);
        public override int CompareAlignmentsWithOriginal(AlignmentSummary other, AlignmentSummary original, bool treatKindly = false)
        {
            if (original == null)
            {
                return(1);
            }

            // when realignment has zero mismatch
            if (other.NumMismatchesIncludeSoftclip == 0)
            {
                // special rule for one indel vs. one mismatch
                if (other.NumIndels == 1 && original.NumMismatchesIncludeSoftclip == 1 && original.NumIndels == 0)
                {
                    return(-1);
                }

                if (original.NumIndels > 0)
                {
                    return(1);
                }

                if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= 1)
                {
                    return(1);
                }

                else
                {
                    return(-1);
                }
            }

            // mismatches not changed by indel realignment
            var numSharedMismatch = original.MismatchesIncludeSoftclip != null?original.MismatchesIncludeSoftclip.Intersect(other.MismatchesIncludeSoftclip).ToList().Count() : 0;

            // More strict with short indels
            if (other.NumIndelBases <= 3)
            {
                if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= MaxMismatchCreatedByIndelShortIndel && other.NumMismatchesIncludeSoftclip <= MaxTotalMismatchShortIndel && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatch)
                {
                    return(1);
                }
                else
                {
                    return(-1);
                }
            }

            // all other cases
            if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= MaxMismatchCreatedByIndel && other.NumMismatchesIncludeSoftclip <= MaxTotalMismatch)
            {
                if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatch)
                {
                    return(1);
                }

                // give preference to long indels
                if (other.NumIndelBases - original.NumIndelBases >= 9 && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatchPreferred)
                {
                    return(1);
                }

                // give preference to indels that have high frequency to start with
                if (other.HasHighFrequencyIndel && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatchPreferred)
                {
                    return(1);
                }
            }

            return(-1);
        }
Beispiel #18
0
        public BamAlignment GetFinalAlignment(BamAlignment origBamAlignment, out bool changed, out bool forcedSoftclip, out bool confirmed, out bool sketchy,
                                              List <PreIndel> selectedIndels = null, List <PreIndel> existingIndels          = null,
                                              bool assumeImperfect           = true, List <HashableIndel> confirmedAccepteds = null, List <PreIndel> mateIndels = null)
        {
            sketchy        = false;
            forcedSoftclip = false;
            bool forcedAlignment = false;
            var  presumeStartPositionForForcedAlignment = 0;

            if (origBamAlignment.CigarData.Count == 0)
            {
                // This was something weird that came up in the halo dataset... mapq is 0 but is still mapped, no cigar

                if (origBamAlignment.Position <= 0 && origBamAlignment.FragmentLength != 0) // No sense trying to fiddle with the position otherwise
                {
                    // TODO does this really even move the needle? Is it helping enough to outweigh its weirdness?
                    var presumedEndPosition = origBamAlignment.MatePosition < origBamAlignment.Position
                        ? origBamAlignment.MatePosition - origBamAlignment.FragmentLength
                        : origBamAlignment.MatePosition + origBamAlignment.FragmentLength;
                    presumeStartPositionForForcedAlignment = presumedEndPosition - origBamAlignment.Bases.Length;
                    forcedAlignment = true;
                }
                else
                {
                    presumeStartPositionForForcedAlignment = origBamAlignment.Position;
                    forcedAlignment = true;
                }
            }

            var  anyIndelsAtAll = _regionFilterer.AnyIndelsNearby(origBamAlignment.Position);
            bool isRealignable  = true;

            if (anyIndelsAtAll)
            {
                var isImperfectRead = false || ((origBamAlignment.ContainsDisallowedCigarOps(_suspectCigarOps) ||
                                                 origBamAlignment.GetIntTag("NM") > 0 || forcedAlignment));
                var isReadWorthCaringAbout = !origBamAlignment.IsDuplicate() && !origBamAlignment.IsSecondary();
                isRealignable = isImperfectRead && isReadWorthCaringAbout && origBamAlignment.Bases.Distinct().Count() > 1;
            }
            else
            {
                _statusCounter.AddStatusCount("No indels nearby at all");
                isRealignable = false;
            }

            if (!isRealignable)
            {
                confirmed = false;
                changed   = false;
                sketchy   = false;
                return(origBamAlignment);
            }

            // TODO maybe flag (or return all) if there's a lot or high quality stuff that we're missing! Esp with pair specific
            var indels = _indelSource.GetRelevantIndels(forcedAlignment ? presumeStartPositionForForcedAlignment : origBamAlignment.Position,
                                                        mateIndels, confirmedAccepteds);

            // Don't realign around single indels if we already have them
            bool          hasExistingUnsanctionedIndels = false;
            bool          existingSanctionedIndelIsBest = false;
            bool          hasVeryGoodIndel       = false;
            bool          hasHardToCallIndel     = false;
            var           existingMatches        = new List <PreIndel>();
            HashableIndel existingConfirmedIndel = new HashableIndel();
            var           existingMatchHashables = new List <HashableIndel>();

            if (indels.Any() && existingIndels != null && existingIndels.Any())
            {
                var topScore             = (float)(indels.Max(x => x.Key.Score));
                var matchesFound         = 0;
                var nonPreExistingIndels = new List <KeyValuePair <HashableIndel, GenomeSnippet> >();

                var index = 0;
                foreach (var kvp in indels)
                {
                    var indel   = kvp.Key;
                    var matches = existingIndels.Where(e => Helper.IsMatch(e, indel));
                    var isMatch = matches.Any();
                    if (isMatch)
                    {
                        matchesFound++;

                        if (!indel.InMulti && index == 0)
                        {
                            existingSanctionedIndelIsBest = true;
                            existingConfirmedIndel        = indel;
                        }

                        var proportionOfTopScore = indel.Score / (float)topScore;
                        if (proportionOfTopScore >= 0.75)
                        {
                            hasVeryGoodIndel = true;
                        }

                        if (indel.HardToCall)
                        {
                            hasHardToCallIndel = true;
                        }

                        existingMatches.AddRange(matches);

                        // TODO do we need special handling of multis?
                        existingMatchHashables.Add(indel);
                    }

                    if (!isMatch || indel.InMulti)
                    {
                        nonPreExistingIndels.Add(kvp);
                    }


                    index++;
                }

                // TODO do we actually want to replace indels with non-pre-existing only?
                indels = nonPreExistingIndels;

                if (matchesFound == 0)
                {
                    hasExistingUnsanctionedIndels = true;
                }
            }

            // TODO this precludes us from having good multis
            if (existingSanctionedIndelIsBest)
            {
                // If it already had the top ranked indel, there's not really any point in trying to realign around others (here we assume that it's also the best fitting indel for the read, hence why it was originally called by the regular aligner).
                _statusCounter.AddStatusCount("Existing indel is already the best available");
                changed   = false;
                confirmed = true;

                UpdateOutcomeForConfirmed(existingConfirmedIndel);

                if (confirmedAccepteds == null)
                {
                    confirmedAccepteds = new List <HashableIndel>();
                }

                confirmedAccepteds.Add(existingConfirmedIndel);

                return(origBamAlignment);
            }


            if (!indels.Any() || origBamAlignment.EndPosition - origBamAlignment.Position > 500)
            {
                if (!indels.Any())
                {
                    // TODO maybe do the forced softclip here if the read did have indels?
                    _statusCounter.AddStatusCount("No indels to realign to");
                    _statusCounter.AppendStatusStringTag("RX", $"{origBamAlignment.GetStringTag("RX")},No indels to realign to", origBamAlignment);
                }
                else
                {
                    _statusCounter.AddStatusCount("Alignment reference span longer than we can realign to");
                }
                changed   = false;
                confirmed = false;
                return(origBamAlignment);
            }



            // TODO this should relate to cap on indel size... introducing too large of an indel will make us go beyond this context.
            var context       = indels.First().Value;
            var orderedIndels = indels.Select(x => x.Key).ToList();
            var numIndels     = orderedIndels.Count;

            _statusCounter.AddStatusCount("Realigning to " + numIndels);

            var bamAlignment = new BamAlignment(origBamAlignment);

            if (forcedAlignment)
            {
                bamAlignment.CigarData = new CigarAlignment(origBamAlignment.Bases.Length + "M");
                bamAlignment.Position  = presumeStartPositionForForcedAlignment;
            }

            var realignResult = _readRealigner.Realign(new Read(_chromosome, bamAlignment),
                                                       orderedIndels, indels.ToDictionary(x => x.Key, x => x.Value), confirmedAccepteds != null && confirmedAccepteds.Any());

            var acceptedIndels = realignResult?.AcceptedIndels;
            var hasAnyIndels   = acceptedIndels != null && acceptedIndels.Any();

            if (realignResult != null)
            {
                _statusCounter.AddStatusCount("Able to realign at all (may still be worse than original)");
                _statusCounter.AppendStatusStringTag("RX", "Able to realign at all(may still be worse than original)", bamAlignment);
            }
            else
            {
                _statusCounter.AddStatusCount("Not able to realign at all");
                _statusCounter.AppendStatusStringTag("RX", "Not able to realign at all", origBamAlignment);
            }

            AlignmentSummary originalAlignmentSummary = null;
            var realignmentUnchanged = true;

            if (realignResult != null)
            {
                originalAlignmentSummary =
                    Extensions.GetAlignmentSummary((new Read(_chromosome, origBamAlignment)), context.Sequence,
                                                   _trackActualMismatches, _checkSoftclipsForMismatches, context.StartPosition);

                realignmentUnchanged = _judger.RealignmentIsUnchanged(realignResult, origBamAlignment);

                if (originalAlignmentSummary.NumMismatches > 0)
                {
                    // TODO PERF do we still want to use this ever?
                    var sumMismatch = Helper.GetSumOfMismatchQualities(origBamAlignment.Qualities,
                                                                       origBamAlignment.Bases, new Read(_chromosome, origBamAlignment).PositionMap, context.Sequence,
                                                                       context.StartPosition);
                    originalAlignmentSummary.SumOfMismatchingQualities = sumMismatch;
                }

                // Within this logic also checking the same as "!realignmentUnchanged" above.. consolidate this.
                if (selectedIndels != null &&
                    (_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, confirmedAccepteds != null && confirmedAccepteds.Any())) ||
                    ResultIsGoodEnough(realignResult, origBamAlignment, originalAlignmentSummary,
                                       realignmentUnchanged, confirmedAccepteds != null && confirmedAccepteds.Any()))
                {
                    UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, true, realignResult);

                    if (realignResult.IsSketchy)
                    {
                        sketchy = true;
                    }
                    return(AcceptRealignment(origBamAlignment, out changed, selectedIndels, existingIndels, realignResult, originalAlignmentSummary, bamAlignment, hasExistingUnsanctionedIndels, out confirmed));
                }
            }


            // At this point, any good realignment would have been returned. If it's realigned and changed now, it's an unaccepted (not good enough) realignment.
            // If it had an indel to begin with, it's basically a vote that we don't trust that indel. Optionally softclip it out.

            if (!realignmentUnchanged)
            {
                changed   = false;
                confirmed = false;

                HandleFailedRealignment(origBamAlignment, ref forcedSoftclip, existingIndels, realignResult, hasExistingUnsanctionedIndels, existingMatches);

                if ((hasVeryGoodIndel || (hasHardToCallIndel && _judger.IsVeryConfident(originalAlignmentSummary))) && !hasExistingUnsanctionedIndels && existingMatchHashables.Any())
                {
                    // It didn't have the tip-top indel, but it had one that was very close, and we tried realigning around the top guys and failed - this one looks better. Give it credit.
                    confirmed = true;
                    foreach (var indel in existingMatchHashables)
                    {
                        UpdateOutcomeForConfirmed(indel);

                        if (confirmedAccepteds != null)
                        {
                            confirmedAccepteds.Add(indel);
                        }
                    }
                }
                UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, false, realignResult);
            }
            else
            {
                if (acceptedIndels != null)
                {
                    foreach (var indelNum in acceptedIndels)
                    {
                        var indel = orderedIndels[indelNum];

                        UpdateOutcomeForConfirmed(indel);
                    }
                }

                _statusCounter.AddStatusCount("INDEL STATUS\tUnchanged\t" + realignResult?.Indels);
                _statusCounter.AppendStatusStringTag("RX", "Unchanged: " + realignResult?.Indels, origBamAlignment);

                confirmed = true;
                changed   = false;
                return(origBamAlignment);
            }

            if (realignResult == null)
            {
                if (_softclipUnknownIndels && hasExistingUnsanctionedIndels)
                {
                    var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x));

                    foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition))
                    {
                        var reverseClip = false;
                        var clipLength  = preIndel.RightAnchor;
                        if (preIndel.LeftAnchor < preIndel.RightAnchor)
                        {
                            reverseClip = true;
                            clipLength  = preIndel.LeftAnchor;
                        }

                        // TODO arbitrary number here...
                        // If it's pretty well-anchored, don't remove the indel
                        if (clipLength > 20)
                        {
                            continue;
                        }

                        forcedSoftclip = true;
                        _statusCounter.AddStatusCount("Softclipped out bad indel");
                        _statusCounter.AppendStatusStringTag("RX",
                                                             $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}... No realignment",
                                                             origBamAlignment);
                        _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels));
                        OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment,
                                                                   reverseClip, preIndel.ReferencePosition);
                    }
                }
            }

            _statusCounter.AppendStatusStringTag("RX", "Realignment failed", origBamAlignment);
            _statusCounter.AddStatusCount("Realignment failed");

            return(origBamAlignment);
        }
 public bool IsVeryConfident(AlignmentSummary realignResult)
 {
     return(realignResult.AnchorLength > 10 && realignResult.NumMismatches <= 1);
 }
 public bool RealignmentBetterOrEqual(RealignmentResult realignResult,
                                      AlignmentSummary originalAlignmentSummary, bool isPairAware)
 {
     return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary, isPairAware) >= 0);
 }
Beispiel #21
0
 public abstract int CompareAlignments(AlignmentSummary preferred, AlignmentSummary other);
        public int CompareAlignmentsWithOriginal2(AlignmentSummary other, AlignmentSummary original)
        {
            if (original == null)
            {
                return(1);
            }

            // Looks a lot worse
            if (other.NumMismatches > original.NumMismatches + 3)
            {
                return(-1);
            }

            if (other.NumMismatches + other.NumSoftclips + other.NumIndelBases ==
                original.NumMismatches + original.NumSoftclips + original.NumIndelBases)
            {
                // Haven't moved the needle much, and for a short indel(s) that probably would have been called originally.
                if (other.NumDeletedBases < 3 && other.NumInsertedBases == 0)
                {
                    return(-1);
                }
            }

            // TODO consider re-instating?
            // Short edge insertion should not be allowed if it doesn't make the read any better (TODO play with this. commenting out for now til I give it more thought.)
            //if (other.AnchorLength == 0 && other.NumIndels == 1 && other.NumInsertedBases <= 2 &&
            //    original.NumMismatchesIncludeSoftclip < other.NumInsertedBases)
            //{
            //    return -1;
            //}

            // TODO maybe tighter restrictions if stuff is not anchored.

            if (other.NumMismatchesIncludeSoftclip == 0)
            {
                // special rule for one indel vs. one mismatch
                // Tweaked this from Xiao's to be specific to single-base indels
                if (other.NumIndels == 1 && other.NumIndelBases == 1 && original.NumMismatchesIncludeSoftclip == 1 &&
                    original.NumIndels == 0)
                {
                    return(-1);
                }

                if (original.NumIndels > 0)
                {
                    return(1);
                }

                if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= 1)
                {
                    return(1);
                }

                return(-1);
            }

            // Be nice to large indels, if they fit well in the new read and the old read was messy to begin with
            // It has to actually look at least a little better, though.
            // There may be some philosophy here with original gap penalties and indel size and placement... TBD
            if (original.NumMismatches > 2 && (other.NumMismatches - original.NumMismatches <= 2) && other.NumIndels - original.NumIndels <= 2 && other.NumIndelBases > 10 && (other.NumMismatches < original.NumMismatches || other.NumMismatchesIncludeSoftclip < (original.NumMismatchesIncludeSoftclip * 0.9) || other.NumSoftclips < original.NumSoftclips))
            {
                return(1);
            }

            if (other.NumIndelBases <= 2 && other.NumIndelBases > original.NumIndelBases &&
                other.NumMismatches >= original.NumMismatches - 1 && (original.NumMismatchesIncludeSoftclip > 10 &&
                                                                      ((!_trustSoftclips && original.NumSoftclips * 0.8 <= other.NumSoftclips) || original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip / 5)))
            {
                //Short indel introduced where there were a lot of softclips and didn't improve a lot
                return(-1);
            }

            // If original had tons of mismatches/softclips, and realign is better but only a little, this may just be chance (ex: polyT) -> don't accept realignment
            if (original.NumMismatchesIncludeSoftclip > 10 &&
                original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip / 10)
            {
                return(-1);
            }

            // Super long original softclip and num mismatches
            // Better be a lot shorter softclip and not add mismatches, or have a bunch more matches from softclips being unmasked.
            // ?Need to have added at least 1 match for every 2 softclips removed.
            const int numSoftclipsToBeConsideredSuperLong = 20;

            // TODO un-magic these numbers
            if (original.NumSoftclips > numSoftclipsToBeConsideredSuperLong && ((other.NumSoftclips / (float)original.NumSoftclips >= 0.75 && other.NumMismatches >= original.NumMismatches) ||
                                                                                (other.NumMatches - original.NumMatches) < (original.NumSoftclips - other.NumSoftclips) / 2f))
            {
                return(-1);
            }

            // Really doesn't look better
            if (original.NumMismatches - other.NumMismatches <= 0 && other.NumMatches - original.NumMatches <= 2 && other.NumIndels >= original.NumIndels && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= 2)
            {
                return(-1);
            }

            var benefitOfDoubtForOrigScMismatches = 0.75;

            if (other.NumMismatches > original.NumMismatches && (other.NumMismatchesIncludeSoftclip > (original.NumMismatchesIncludeSoftclip * benefitOfDoubtForOrigScMismatches)) && other.AnchorLength < 3)
            {
                return(-1);
            }

            //var threshNumSharedMismatch = 8;
            var threshnumNotSharedMismatch           = 2;
            var threshReductionInMismatches          = 1;
            var threshReductionInmMismatchesForSmall = 2;
            var numSharedMismatch = 0;

            if (_trackActualMismatches)
            {
                if (original.MismatchesIncludeSoftclip == null || other.MismatchesIncludeSoftclip == null)
                {
                    numSharedMismatch = 0;
                }
                else
                {
                    numSharedMismatch = original.MismatchesIncludeSoftclip.Intersect(other.MismatchesIncludeSoftclip).ToList().Count();
                }
            }
            else
            {
                numSharedMismatch = Math.Min(original.NumMismatchesIncludeSoftclip,
                                             other.NumMismatchesIncludeSoftclip); // Use an approximation if we don't want to do the whole thing
            }

            // Be more wary of shorter indels
            if (other.NumIndelBases <= 3 && (original.NumIndelBases == 0 || original.NumIndelBases > 3))
            {
                if (other.NumMismatchesIncludeSoftclip - numSharedMismatch == 0 &&                                                      // the only mismatches in the new one are shared
                    //numSharedMismatch <= threshNumSharedMismatch && // what was the point of this?
                    original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= threshReductionInmMismatchesForSmall) // the new one has less mismatches overall
                {
                    //wary of shorter indel and shared mismatches
                    return(1);
                }

                if (other.NumMismatchesIncludeSoftclip - original.NumMismatchesIncludeSoftclip <= 1)
                {
                    return(1);
                }
                return(-1);
            }


            if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= threshnumNotSharedMismatch)
            {
                // most of the mismatches are shared and num mismatches is small

                if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= threshReductionInMismatches)
                {
                    // fewer mismatches than original
                    return(1);
                }
            }

            return(-1 * CompareAlignments(original, other));
        }
Beispiel #23
0
 private bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary)
 {
     return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary) >= 0);
 }
        public override int CompareAlignments(AlignmentSummary original, AlignmentSummary other)
        {
            if (other == null)
            {
                return(1);
            }

            // Original was much better
            if (other.NumMismatches > original.NumMismatches + 3)
            {
                return(1);
            }

            if (original.NumMismatches == 1 && original.NumIndels == 0 && other.NumIndels > 1)
            {
                return(1);
            }

            if (other.NumMismatches == 1 && other.NumIndels == 0 && original.NumIndels > 1)
            {
                return(-1);
            }

            // Original wasn't that bad, and it's better than new
            if (original.NumMismatchesIncludeSoftclip < 5 &&
                original.NumMismatchesIncludeSoftclip < other.NumMismatchesIncludeSoftclip)
            {
                return(1);
            }
            // Original was bad, but is reasonably better than new
            if (original.NumMismatchesIncludeSoftclip >= 5 &&
                original.NumMismatchesIncludeSoftclip < other.NumMismatchesIncludeSoftclip * 0.8)
            {
                return(1);
            }

            // New is reasonably better than original
            if (original.NumMismatchesIncludeSoftclip > other.NumMismatchesIncludeSoftclip + 1)
            {
                return(-1);
            }

            if (original.NumIndelBases == other.NumIndelBases)
            {
                if (original.NumIndels == 1 && other.NumIndels > 1 && original.NumMismatches <= 2)
                {
                    return(1);
                }
                if (other.NumIndels == 1 && original.NumIndels > 1 && other.NumMismatches <= 2)
                {
                    return(-1);
                }

                if (original.NumMismatches > 0 && other.NumMismatches > 0 && original.NumMismatches <= 5 && other.NumMismatches <= 5)
                {
                    // Rather have extra mismatches be low quality, as it's more likely they are illegitmate and this is in the right place
                    if (original.SumOfMismatchingQualities <= other.SumOfMismatchingQualities)
                    {
                        return(1);
                    }

                    if (original.SumOfMismatchingQualities > other.SumOfMismatchingQualities)
                    {
                        return(-1);
                    }
                }
            }

            if (original.NumMismatchesIncludeSoftclip > 0 && other.NumMismatchesIncludeSoftclip == 0)
            {
                return(-1);
            }

            if (original.NumIndels < other.NumIndels)
            {
                return(1);
            }

            if (original.NumIndels > other.NumIndels)
            {
                return(-1);
            }

            return(0);
        }
Beispiel #25
0
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead = 0;
            var anchorLength     = 0;
            var endAnchorLength  = 0;
            var hasHitNonMatch   = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < operation.Length; i++)
                    {
                        summary.NumSoftclips++;

                        if (readSequence[startIndexInRead + i] != 'N')
                        {
                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i])
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < operation.Length; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += (int)operation.Length;
                    summary.NumInsertedBases += (int)operation.Length;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += (int)operation.Length;
                    summary.NumDeletedBases += (int)operation.Length;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += (int)operation.Length;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += (int)operation.Length;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += (int)operation.Length;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead   = 0;
            var anchorLength       = 0;
            var endAnchorLength    = 0;
            var hasHitNonMatch     = false;
            var hasHitNonNSoftclip = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                var opLength  = (int)(operation.Length);
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < opLength; i++)
                    {
                        summary.NumSoftclips++;

                        // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think.
                        if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip)
                        {
                            hasHitNonNSoftclip = true;

                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N')
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                        //else
                        //{
                        //    if (!hasHitNonNSoftclip)
                        //    {
                        //        nSoftclipLength++;
                        //    }
                        //}
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < opLength; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        if (startIndexInReference + i < 0)
                        {
                            throw new InvalidDataException(
                                      "Read would be before beginning of the chromosome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += opLength;
                    summary.NumInsertedBases += opLength;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += opLength;
                    summary.NumDeletedBases += opLength;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += opLength;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += opLength;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += opLength;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }