Пример #1
0
        public static bool RepeatDeletionFlankedByRepeats(string readSequence, HashableIndel priorIndel, int deletionPositionInRead)
        {
            var leftIsSketchy  = false;
            var rightIsSketchy = false;

            var repeatUnitLength = priorIndel.RepeatUnit.Length;

            if (deletionPositionInRead >= repeatUnitLength)
            {
                var leftFlankingBases =
                    readSequence.Substring(deletionPositionInRead + 1 - repeatUnitLength, repeatUnitLength);
                if (leftFlankingBases == priorIndel.RepeatUnit)
                {
                    leftIsSketchy = true;
                }
            }

            if (readSequence.Length - deletionPositionInRead >= repeatUnitLength)
            {
                var rightFlankingBases =
                    readSequence.Substring(deletionPositionInRead + 1, repeatUnitLength);
                if (rightFlankingBases == priorIndel.RepeatUnit)
                {
                    rightIsSketchy = true;
                }
            }

            return(leftIsSketchy && rightIsSketchy);
        }
Пример #2
0
        public static HashableIndel CopyHashable(HashableIndel indel1, string otherIndel = null)
        {
            var indel1New = new HashableIndel()
            {
                AllowMismatchingInsertions = indel1.AllowMismatchingInsertions,
                AlternateAllele            = indel1.AlternateAllele,
                Chromosome    = indel1.Chromosome,
                InMulti       = !string.IsNullOrEmpty(otherIndel) || indel1.InMulti,
                IsDuplication = indel1.IsDuplication,
                IsRepeat      = indel1.IsRepeat,
                IsUntrustworthyInRepeatRegion = indel1.IsUntrustworthyInRepeatRegion,
                Length = indel1.Length,
                NumBasesInReferenceSuffixBeforeUnique = indel1.NumBasesInReferenceSuffixBeforeUnique,
                ReferencePosition    = indel1.ReferencePosition,
                StringRepresentation = HashableToString(indel1),
                Score              = indel1.Score,
                Type               = indel1.Type,
                RefPrefix          = indel1.RefPrefix,
                RefSuffix          = indel1.RefSuffix,
                OtherIndel         = string.IsNullOrEmpty(otherIndel) ? indel1.OtherIndel : otherIndel,
                ReferenceAllele    = indel1.ReferenceAllele,
                RepeatUnit         = indel1.RepeatUnit,
                NumRepeatsNearby   = indel1.NumRepeatsNearby,
                NumApproxDupsLeft  = indel1.NumApproxDupsLeft,
                NumApproxDupsRight = indel1.NumApproxDupsRight
            };

            return(indel1New);
        }
        public void WriteIndelOutcomesFile()
        {
            var hashables = new ConcurrentDictionary <HashableIndel, int[]>();

            var lines = VerifyIndelOutcomesFile(hashables, 1);

            hashables = new ConcurrentDictionary <HashableIndel, int[]>();
            var hashable = new HashableIndel()
            {
                ReferencePosition = 100,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                Chromosome        = "chr1"
            };
            var hashable2 = new HashableIndel()
            {
                ReferencePosition = 1000,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                Chromosome        = "chr1"
            };

            hashables[hashable]  = new int[] { 0, 1, 2, 3, 4, 5 };
            hashables[hashable2] = new int[] { 0, 1, 2, 3, 4, 5 };

            lines = VerifyIndelOutcomesFile(hashables, 3);
        }
Пример #4
0
 // Evaluate insertions at read ends to determine if they are partial or unanchored
 // minimumUnanchoredInsertionLength applies to the indel target that is being realigned against.
 public static bool EvaluateInsertionAtReadEnds(CigarOp cigar, HashableIndel indel, int minimumUnanchoredInsertionLength, bool maskPartialInsertion)
 {
     if (cigar.Type == 'I')
     {
         var isPartial    = maskPartialInsertion && cigar.Length < indel.Length;
         var isUnanchored = indel.Length < minimumUnanchoredInsertionLength; // TODO is this really the right move? Why not count this against the observation rather than the expected?
         return(isPartial || isUnanchored);
     }
     return(false);
 }
Пример #5
0
        public bool CanCoexist(HashableIndel indel1, HashableIndel indel2, bool pairSpecific = true)
        {
            // TODO do we really need to allow for a scenario where we let stuff coexist even though we've never seen it before? If so, need to revisit overlapping indel logic ie chr22:24037625 T>TCTGTTG,chr22:24037625 TCTG>T should not be allowed
            {
                if (!indel1.InMulti || !indel2.InMulti)
                {
                    return(false);
                }

                return(indel1.OtherIndel == indel2.StringRepresentation);
            }
        }
Пример #6
0
        private void UpdateOutcomeForConfirmed(HashableIndel existingConfirmedIndel)
        {
            if (!_indelOutcomes.TryGetValue(existingConfirmedIndel, out var outcomesForIndel))
            {
                // success, failure, Rank, numIndels, multis, confirmed
                outcomesForIndel = new int[8];
                _indelOutcomes.Add(existingConfirmedIndel, outcomesForIndel);
            }

            outcomesForIndel[2]++;
            outcomesForIndel[5]++;
            outcomesForIndel[3]++;
            // TODO this doesn't handle multis at all.
        }
Пример #7
0
        public int CompareSimple(HashableIndel c1, HashableIndel c2)
        {
            var coordinateResult = c1.ReferencePosition.CompareTo(c2.ReferencePosition);

            if (coordinateResult == 0)
            {
                if (c1.Type == AlleleCategory.Insertion)  // return insertions first
                {
                    return(-1);
                }
                return(1);
            }
            return(coordinateResult);
        }
Пример #8
0
        public static bool IsMatch(HashableIndel hashable1, HashableIndel hashable2)
        {
            var equivPosition = hashable1.Chromosome == hashable2.Chromosome &&
                                hashable1.ReferencePosition == hashable2.ReferencePosition;

            if (!equivPosition)
            {
                return(false);
            }

            var equivAlleles = hashable1.Type == AlleleCategory.Insertion ? InsertionsAreMatch(hashable1.AlternateAllele, hashable2.AlternateAllele) :
                               hashable1.ReferenceAllele.Length == hashable2.ReferenceAllele.Length;

            return(equivAlleles);
        }
        public void WriteIndelsFile()
        {
            var hashables = new ConcurrentDictionary <HashableIndel, int>();

            var lines = VerifyWriteIndelsFile(hashables, 1);

            hashables = new ConcurrentDictionary <HashableIndel, int>();
            var hashable = new HashableIndel()
            {
                ReferencePosition = 100, ReferenceAllele = "A", AlternateAllele = "T", Chromosome = "chr1"
            };

            hashables[hashable] = 10;
            lines = VerifyWriteIndelsFile(hashables, 2);
        }
Пример #10
0
        public static bool IsMatch(PreIndel pre, HashableIndel hashable)
        {
            var equivPosition = pre.Chromosome == hashable.Chromosome &&
                                pre.ReferencePosition == hashable.ReferencePosition;

            if (!equivPosition)
            {
                return(false);
            }

            var equivAlleles = pre.Type == AlleleCategory.Insertion ? InsertionsAreMatch(pre.AlternateAllele, hashable.AlternateAllele):
                               pre.ReferenceAllele.Length == hashable.ReferenceAllele.Length;

            return(equivAlleles);
        }
Пример #11
0
        public void RepeatDeletionFlankedByRepeats()
        {
            var rptADeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TAAAA",
                IsRepeat        = true,
                RepeatUnit      = "A"
            };

            Assert.False(Helper.RepeatDeletionFlankedByRepeats("TTATA", rptADeletion, 2));
            Assert.True(Helper.RepeatDeletionFlankedByRepeats("CCAAA", rptADeletion, 2));
            Assert.True(Helper.RepeatDeletionFlankedByRepeats("AAAAA", rptADeletion, 2));
            Assert.False(Helper.RepeatDeletionFlankedByRepeats("TTTAA", rptADeletion, 2));
        }
Пример #12
0
        private static HashableIndel GetHashableIndel(PreIndel preIndel, int score = 0)
        {
            var indelIdentifier = new HashableIndel
            {
                Chromosome        = preIndel.Chromosome,
                ReferencePosition = preIndel.ReferencePosition,
                ReferenceAllele   = preIndel.ReferenceAllele,
                AlternateAllele   = preIndel.AlternateAllele,
                Type = preIndel.ReferenceAllele.Length > preIndel.AlternateAllele.Length
                    ? AlleleCategory.Deletion
                    : AlleleCategory.Insertion,
                Length     = Math.Abs(preIndel.ReferenceAllele.Length - preIndel.AlternateAllele.Length),
                Score      = score,
                InMulti    = preIndel.InMulti,
                OtherIndel = preIndel.OtherIndel
            };

            return(Helper.CopyHashable(indelIdentifier));
        }
Пример #13
0
        public BamAlignment GetFinalAlignment(BamAlignment origBamAlignment, out bool changed, out bool forcedSoftclip, out bool confirmed, out bool sketchy,
                                              List <PreIndel> selectedIndels = null, List <PreIndel> existingIndels          = null,
                                              bool assumeImperfect           = true, List <HashableIndel> confirmedAccepteds = null, List <PreIndel> mateIndels = null)
        {
            sketchy        = false;
            forcedSoftclip = false;
            bool forcedAlignment = false;
            var  presumeStartPositionForForcedAlignment = 0;

            if (origBamAlignment.CigarData.Count == 0)
            {
                // This was something weird that came up in the halo dataset... mapq is 0 but is still mapped, no cigar

                if (origBamAlignment.Position <= 0 && origBamAlignment.FragmentLength != 0) // No sense trying to fiddle with the position otherwise
                {
                    // TODO does this really even move the needle? Is it helping enough to outweigh its weirdness?
                    var presumedEndPosition = origBamAlignment.MatePosition < origBamAlignment.Position
                        ? origBamAlignment.MatePosition - origBamAlignment.FragmentLength
                        : origBamAlignment.MatePosition + origBamAlignment.FragmentLength;
                    presumeStartPositionForForcedAlignment = presumedEndPosition - origBamAlignment.Bases.Length;
                    forcedAlignment = true;
                }
                else
                {
                    presumeStartPositionForForcedAlignment = origBamAlignment.Position;
                    forcedAlignment = true;
                }
            }

            var  anyIndelsAtAll = _regionFilterer.AnyIndelsNearby(origBamAlignment.Position);
            bool isRealignable  = true;

            if (anyIndelsAtAll)
            {
                var isImperfectRead = false || ((origBamAlignment.ContainsDisallowedCigarOps(_suspectCigarOps) ||
                                                 origBamAlignment.GetIntTag("NM") > 0 || forcedAlignment));
                var isReadWorthCaringAbout = !origBamAlignment.IsDuplicate() && !origBamAlignment.IsSecondary();
                isRealignable = isImperfectRead && isReadWorthCaringAbout && origBamAlignment.Bases.Distinct().Count() > 1;
            }
            else
            {
                _statusCounter.AddStatusCount("No indels nearby at all");
                isRealignable = false;
            }

            if (!isRealignable)
            {
                confirmed = false;
                changed   = false;
                sketchy   = false;
                return(origBamAlignment);
            }

            // TODO maybe flag (or return all) if there's a lot or high quality stuff that we're missing! Esp with pair specific
            var indels = _indelSource.GetRelevantIndels(forcedAlignment ? presumeStartPositionForForcedAlignment : origBamAlignment.Position,
                                                        mateIndels, confirmedAccepteds);

            // Don't realign around single indels if we already have them
            bool          hasExistingUnsanctionedIndels = false;
            bool          existingSanctionedIndelIsBest = false;
            bool          hasVeryGoodIndel       = false;
            bool          hasHardToCallIndel     = false;
            var           existingMatches        = new List <PreIndel>();
            HashableIndel existingConfirmedIndel = new HashableIndel();
            var           existingMatchHashables = new List <HashableIndel>();

            if (indels.Any() && existingIndels != null && existingIndels.Any())
            {
                var topScore             = (float)(indels.Max(x => x.Key.Score));
                var matchesFound         = 0;
                var nonPreExistingIndels = new List <KeyValuePair <HashableIndel, GenomeSnippet> >();

                var index = 0;
                foreach (var kvp in indels)
                {
                    var indel   = kvp.Key;
                    var matches = existingIndels.Where(e => Helper.IsMatch(e, indel));
                    var isMatch = matches.Any();
                    if (isMatch)
                    {
                        matchesFound++;

                        if (!indel.InMulti && index == 0)
                        {
                            existingSanctionedIndelIsBest = true;
                            existingConfirmedIndel        = indel;
                        }

                        var proportionOfTopScore = indel.Score / (float)topScore;
                        if (proportionOfTopScore >= 0.75)
                        {
                            hasVeryGoodIndel = true;
                        }

                        if (indel.HardToCall)
                        {
                            hasHardToCallIndel = true;
                        }

                        existingMatches.AddRange(matches);

                        // TODO do we need special handling of multis?
                        existingMatchHashables.Add(indel);
                    }

                    if (!isMatch || indel.InMulti)
                    {
                        nonPreExistingIndels.Add(kvp);
                    }


                    index++;
                }

                // TODO do we actually want to replace indels with non-pre-existing only?
                indels = nonPreExistingIndels;

                if (matchesFound == 0)
                {
                    hasExistingUnsanctionedIndels = true;
                }
            }

            // TODO this precludes us from having good multis
            if (existingSanctionedIndelIsBest)
            {
                // If it already had the top ranked indel, there's not really any point in trying to realign around others (here we assume that it's also the best fitting indel for the read, hence why it was originally called by the regular aligner).
                _statusCounter.AddStatusCount("Existing indel is already the best available");
                changed   = false;
                confirmed = true;

                UpdateOutcomeForConfirmed(existingConfirmedIndel);

                if (confirmedAccepteds == null)
                {
                    confirmedAccepteds = new List <HashableIndel>();
                }

                confirmedAccepteds.Add(existingConfirmedIndel);

                return(origBamAlignment);
            }


            if (!indels.Any() || origBamAlignment.EndPosition - origBamAlignment.Position > 500)
            {
                if (!indels.Any())
                {
                    // TODO maybe do the forced softclip here if the read did have indels?
                    _statusCounter.AddStatusCount("No indels to realign to");
                    _statusCounter.AppendStatusStringTag("RX", $"{origBamAlignment.GetStringTag("RX")},No indels to realign to", origBamAlignment);
                }
                else
                {
                    _statusCounter.AddStatusCount("Alignment reference span longer than we can realign to");
                }
                changed   = false;
                confirmed = false;
                return(origBamAlignment);
            }



            // TODO this should relate to cap on indel size... introducing too large of an indel will make us go beyond this context.
            var context       = indels.First().Value;
            var orderedIndels = indels.Select(x => x.Key).ToList();
            var numIndels     = orderedIndels.Count;

            _statusCounter.AddStatusCount("Realigning to " + numIndels);

            var bamAlignment = new BamAlignment(origBamAlignment);

            if (forcedAlignment)
            {
                bamAlignment.CigarData = new CigarAlignment(origBamAlignment.Bases.Length + "M");
                bamAlignment.Position  = presumeStartPositionForForcedAlignment;
            }

            var realignResult = _readRealigner.Realign(new Read(_chromosome, bamAlignment),
                                                       orderedIndels, indels.ToDictionary(x => x.Key, x => x.Value), confirmedAccepteds != null && confirmedAccepteds.Any());

            var acceptedIndels = realignResult?.AcceptedIndels;
            var hasAnyIndels   = acceptedIndels != null && acceptedIndels.Any();

            if (realignResult != null)
            {
                _statusCounter.AddStatusCount("Able to realign at all (may still be worse than original)");
                _statusCounter.AppendStatusStringTag("RX", "Able to realign at all(may still be worse than original)", bamAlignment);
            }
            else
            {
                _statusCounter.AddStatusCount("Not able to realign at all");
                _statusCounter.AppendStatusStringTag("RX", "Not able to realign at all", origBamAlignment);
            }

            AlignmentSummary originalAlignmentSummary = null;
            var realignmentUnchanged = true;

            if (realignResult != null)
            {
                originalAlignmentSummary =
                    Extensions.GetAlignmentSummary((new Read(_chromosome, origBamAlignment)), context.Sequence,
                                                   _trackActualMismatches, _checkSoftclipsForMismatches, context.StartPosition);

                realignmentUnchanged = _judger.RealignmentIsUnchanged(realignResult, origBamAlignment);

                if (originalAlignmentSummary.NumMismatches > 0)
                {
                    // TODO PERF do we still want to use this ever?
                    var sumMismatch = Helper.GetSumOfMismatchQualities(origBamAlignment.Qualities,
                                                                       origBamAlignment.Bases, new Read(_chromosome, origBamAlignment).PositionMap, context.Sequence,
                                                                       context.StartPosition);
                    originalAlignmentSummary.SumOfMismatchingQualities = sumMismatch;
                }

                // Within this logic also checking the same as "!realignmentUnchanged" above.. consolidate this.
                if (selectedIndels != null &&
                    (_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, confirmedAccepteds != null && confirmedAccepteds.Any())) ||
                    ResultIsGoodEnough(realignResult, origBamAlignment, originalAlignmentSummary,
                                       realignmentUnchanged, confirmedAccepteds != null && confirmedAccepteds.Any()))
                {
                    UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, true, realignResult);

                    if (realignResult.IsSketchy)
                    {
                        sketchy = true;
                    }
                    return(AcceptRealignment(origBamAlignment, out changed, selectedIndels, existingIndels, realignResult, originalAlignmentSummary, bamAlignment, hasExistingUnsanctionedIndels, out confirmed));
                }
            }


            // At this point, any good realignment would have been returned. If it's realigned and changed now, it's an unaccepted (not good enough) realignment.
            // If it had an indel to begin with, it's basically a vote that we don't trust that indel. Optionally softclip it out.

            if (!realignmentUnchanged)
            {
                changed   = false;
                confirmed = false;

                HandleFailedRealignment(origBamAlignment, ref forcedSoftclip, existingIndels, realignResult, hasExistingUnsanctionedIndels, existingMatches);

                if ((hasVeryGoodIndel || (hasHardToCallIndel && _judger.IsVeryConfident(originalAlignmentSummary))) && !hasExistingUnsanctionedIndels && existingMatchHashables.Any())
                {
                    // It didn't have the tip-top indel, but it had one that was very close, and we tried realigning around the top guys and failed - this one looks better. Give it credit.
                    confirmed = true;
                    foreach (var indel in existingMatchHashables)
                    {
                        UpdateOutcomeForConfirmed(indel);

                        if (confirmedAccepteds != null)
                        {
                            confirmedAccepteds.Add(indel);
                        }
                    }
                }
                UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, false, realignResult);
            }
            else
            {
                if (acceptedIndels != null)
                {
                    foreach (var indelNum in acceptedIndels)
                    {
                        var indel = orderedIndels[indelNum];

                        UpdateOutcomeForConfirmed(indel);
                    }
                }

                _statusCounter.AddStatusCount("INDEL STATUS\tUnchanged\t" + realignResult?.Indels);
                _statusCounter.AppendStatusStringTag("RX", "Unchanged: " + realignResult?.Indels, origBamAlignment);

                confirmed = true;
                changed   = false;
                return(origBamAlignment);
            }

            if (realignResult == null)
            {
                if (_softclipUnknownIndels && hasExistingUnsanctionedIndels)
                {
                    var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x));

                    foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition))
                    {
                        var reverseClip = false;
                        var clipLength  = preIndel.RightAnchor;
                        if (preIndel.LeftAnchor < preIndel.RightAnchor)
                        {
                            reverseClip = true;
                            clipLength  = preIndel.LeftAnchor;
                        }

                        // TODO arbitrary number here...
                        // If it's pretty well-anchored, don't remove the indel
                        if (clipLength > 20)
                        {
                            continue;
                        }

                        forcedSoftclip = true;
                        _statusCounter.AddStatusCount("Softclipped out bad indel");
                        _statusCounter.AppendStatusStringTag("RX",
                                                             $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}... No realignment",
                                                             origBamAlignment);
                        _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels));
                        OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment,
                                                                   reverseClip, preIndel.ReferencePosition);
                    }
                }
            }

            _statusCounter.AppendStatusStringTag("RX", "Realignment failed", origBamAlignment);
            _statusCounter.AddStatusCount("Realignment failed");

            return(origBamAlignment);
        }
Пример #14
0
 public static string HashableToString(HashableIndel indel)
 {
     return(indel.Chromosome + ":" + indel.ReferencePosition + " " + indel.ReferenceAllele + ">" + indel.AlternateAllele);
 }
Пример #15
0
        public static bool DeletionHasSketchyAnchor(string readSequence, HashableIndel priorIndel, int deletionPositionInRead)
        {
            var anyNonRepeatInLeftAnchor  = false;
            var anyNonRepeatInRightAnchor = false;
            var assessedLeftAnchor        = false;
            var assessedRightAnchor       = false;


            for (int i = deletionPositionInRead + 1; i < readSequence.Length; i += priorIndel.RepeatUnit.Length)
            {
                var basesLeft = readSequence.Length - i;
                if (basesLeft < 0)
                {
                    break;
                }

                assessedRightAnchor = true;

                var numBasesToCompare = priorIndel.RepeatUnit.Length;
                var basesToCompare    = priorIndel.RepeatUnit;
                if (basesLeft < numBasesToCompare)
                {
                    numBasesToCompare = basesLeft;
                    basesToCompare    = basesToCompare.Substring(0, numBasesToCompare);
                }

                // TODO go back and get this logic from Hubble?
                var seqHere = readSequence.Substring(i, numBasesToCompare);
                if (seqHere != basesToCompare)
                {
                    // TODO PERF can we break here to save time?
                    anyNonRepeatInRightAnchor = true;
                }
            }

            for (int i = deletionPositionInRead + 1; i >= 0; i -= priorIndel.RepeatUnit.Length)
            {
                var basesLeft = i;
                if (basesLeft - 1 < 0)
                {
                    break;
                }

                if (i + priorIndel.RepeatUnit.Length >= readSequence.Length)
                {
                    continue;
                }

                assessedLeftAnchor = true;
                var numBasesToCompare = priorIndel.RepeatUnit.Length;
                var basesToCompare    = priorIndel.RepeatUnit;
                if (basesLeft < numBasesToCompare)
                {
                    numBasesToCompare = basesLeft;
                    basesToCompare    = basesToCompare.Substring(priorIndel.RepeatUnit.Length - numBasesToCompare);
                }

                // TODO go back and get this logic from Hubble?
                var seqHere = readSequence.Substring(i - 1, numBasesToCompare);
                if (seqHere != basesToCompare)
                {
                    // TODO PERF can we break here to save time?
                    anyNonRepeatInLeftAnchor = true;
                }
            }

            if ((assessedLeftAnchor && !anyNonRepeatInLeftAnchor) || (assessedRightAnchor && !anyNonRepeatInRightAnchor))
            {
                return(true);
            }

            return(false);
        }
Пример #16
0
        public void GetRelevantIndels()
        {
            var indel = new HashableIndel()
            {
                AlternateAllele   = "AG",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 1,
                ReferencePosition = 10002,
                Score             = 1,
                Type = AlleleCategory.Insertion
            };
            var indel2 = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 10002,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionWayLower = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 8002,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionLikelyDiffBlockButWithinRange = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 9800,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var positionWayHigher = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 21000,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var borderCaseHigh = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 10251,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var borderCaseLow = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 9752,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };
            var indelAt0 = new HashableIndel()
            {
                AlternateAllele   = "AGT",
                ReferenceAllele   = "A",
                Chromosome        = "chr1",
                Length            = 2,
                ReferencePosition = 0,
                Score             = 10,
                Type = AlleleCategory.Insertion
            };


            var indels = new List <HashableIndel>()
            {
                indel,
                indel2,
                positionWayLower,
                positionLikelyDiffBlockButWithinRange,
                positionWayHigher,
                borderCaseHigh,
                borderCaseLow,
                indelAt0
            };

            var snippetSource = new Mock <IGenomeSnippetSource>();

            snippetSource.Setup(s => s.GetGenomeSnippet(It.IsAny <int>())).Returns(new GenomeSnippet()
            {
                Chromosome = "chr1", Sequence = new string('A', 2000), StartPosition = 1
            });
            var indelSource = new ChromosomeIndelSource(indels, snippetSource.Object);

            //var relevant = indelSource.GetRelevantIndels(100);
            //Assert.Equal(4, relevant.Count);

            // Should get indel1 and 2, border high, border low, withinrange
            var relevant = indelSource.GetRelevantIndels(10002);

            Assert.Equal(5, relevant.Count());

            // Should get indel1 and 2, border low, within range, but not border high (now > 250 away)
            relevant = indelSource.GetRelevantIndels(10000);
            Assert.Equal(4, relevant.Count());

            // Should get all 5 as 10002 did, showing that it is 250 inclusive
            relevant = indelSource.GetRelevantIndels(10001);
            Assert.Equal(5, relevant.Count());

            // Should get the 9752 and the 9800
            relevant = indelSource.GetRelevantIndels(9700);
            Assert.Equal(2, relevant.Count());

            // Not close enough to anything
            relevant = indelSource.GetRelevantIndels(9500);
            Assert.Equal(0.0, relevant.Count());

            relevant = indelSource.GetRelevantIndels(0);
            Assert.Equal(1.0, relevant.Count());

            relevant = indelSource.GetRelevantIndels(100000);
            Assert.Equal(0.0, relevant.Count());
        }
Пример #17
0
        public static HashableIndel GetHashableIndel(GenomeSnippet snippet, PreIndel preIndel, int contextStart, bool debug)
        {
            var actualReferenceAllele = ActualReferenceAllele(snippet, preIndel, contextStart);

            var actualAltAllele = ActualAltAllele(preIndel, actualReferenceAllele);

            var indelType = actualReferenceAllele.Length > actualAltAllele.Length
                ? AlleleCategory.Deletion
                : AlleleCategory.Insertion;

            string repeatUnit;
            var    variantBases = indelType == AlleleCategory.Insertion
                ? actualAltAllele.Substring(1)
                : actualReferenceAllele.Substring(1);

            const int maxRepeatUnitLength = 3;
            var       isRepeat            = StitchingLogic.OverlapEvaluator.IsRepeat(variantBases, maxRepeatUnitLength
                                                                                     , out repeatUnit);

            var isDuplication = Helper.IsDuplication(snippet.Sequence, preIndel.ReferencePosition, isRepeat, repeatUnit, actualAltAllele);

            var numRepeatsLeft = 0;
            var numRepeats     = 0;

            if (indelType == AlleleCategory.Insertion && preIndel.Length > 3)
            {
                var currentPos = preIndel.ReferencePosition - snippet.StartPosition;
                while (true)
                {
                    // TODO < or <=
                    if (snippet.Sequence.Length <= currentPos + preIndel.Length)
                    {
                        break;
                    }
                    // Need to go both directions because we're allowing inexact.
                    var referenceAfterInsertion = snippet.Sequence.Substring(currentPos, preIndel.Length);

                    bool stillMatch = false;
                    if (referenceAfterInsertion != variantBases)
                    {
                        var numMismatches = Helper.GetHammingNumMismatches(referenceAfterInsertion, variantBases);
                        if (numMismatches <= 1)
                        {
                            stillMatch = true;
                        }
                    }
                    else
                    {
                        stillMatch = true;
                    }

                    if (stillMatch)
                    {
                        numRepeats++;
                        currentPos += preIndel.Length;
                    }
                    else
                    {
                        break;
                    }
                }

                var currentPosLeft = preIndel.ReferencePosition - preIndel.Length - snippet.StartPosition;
                while (true)
                {
                    // Need to go both directions because we're allowing inexact.
                    if (currentPosLeft < 0)
                    {
                        break;
                    }
                    var referenceAfterInsertion = snippet.Sequence.Substring(currentPosLeft, preIndel.Length);

                    bool stillMatch = false;
                    if (referenceAfterInsertion != variantBases)
                    {
                        var numMismatches = Helper.GetHammingNumMismatches(referenceAfterInsertion, variantBases);
                        if (numMismatches <= 1)
                        {
                            stillMatch = true;
                        }
                    }
                    else
                    {
                        stillMatch = true;
                    }

                    if (stillMatch)
                    {
                        numRepeatsLeft++;
                        currentPosLeft -= preIndel.Length;
                    }
                    else
                    {
                        break;
                    }
                }
            }

            string newRepeatUnit;
            var    repeats = Helper.ComputeRMxNLengthForIndel(preIndel.ReferencePosition - snippet.StartPosition, variantBases, snippet.Sequence, 6, out newRepeatUnit);

            if (repeats >= 6) // TODO make this configurable?
            {
                isRepeat   = true;
                repeatUnit = newRepeatUnit;
            }

            string otherIndel = "";

            if (preIndel.InMulti)
            {
                var otherAsPre = GetIndelKey(preIndel.OtherIndel);
                otherAsPre.ReferenceAllele = ActualReferenceAllele(snippet, otherAsPre, contextStart);
                otherAsPre.AlternateAllele = ActualAltAllele(otherAsPre, otherAsPre.ReferenceAllele);
                otherIndel = Helper.CandidateToString(otherAsPre);
            }

            var length = Math.Abs(actualReferenceAllele.Length - actualAltAllele.Length);
            var isUntrustworthyInRepeatRegion = false;

            if (length == 1)
            {
                isUntrustworthyInRepeatRegion = Helper.IsInHomopolymerStretch(snippet.Sequence, preIndel.ReferencePosition);
            }

            // TODO ADD TESTS!!
            var refPrefix = ReferencePrefix(snippet, preIndel, contextStart);
            var refSuffix = ReferenceSuffix(snippet, preIndel, contextStart);

            //Read-end repeats of this repeat unit that are this length or smaller should not be trusted as insertion evidence, but larger ones can
            var numBasesBeforeInsertionUnique = 0;

            if (indelType == AlleleCategory.Insertion)
            {
                var sequenceToCheckFor = isRepeat ? repeatUnit : actualAltAllele;

                for (int i = 0; i < refSuffix.Length - sequenceToCheckFor.Length; i += sequenceToCheckFor.Length)
                {
                    if (refSuffix.Substring(i, sequenceToCheckFor.Length) == sequenceToCheckFor)
                    {
                        numBasesBeforeInsertionUnique++;
                    }
                    else
                    {
                        break;
                    }
                }
            }


            var indelIdentifier = new HashableIndel
            {
                Chromosome        = preIndel.Chromosome,
                ReferencePosition = preIndel.ReferencePosition,
                ReferenceAllele   = actualReferenceAllele,
                AlternateAllele   = actualAltAllele,
                Type          = indelType,
                Length        = length,
                Score         = preIndel.Score,
                InMulti       = preIndel.InMulti,
                OtherIndel    = otherIndel,
                IsRepeat      = isRepeat,
                RepeatUnit    = repeatUnit,
                IsDuplication = isDuplication,
                IsUntrustworthyInRepeatRegion = isUntrustworthyInRepeatRegion,
                RefPrefix = refPrefix,
                RefSuffix = refSuffix,
                NumBasesInReferenceSuffixBeforeUnique = numBasesBeforeInsertionUnique,
                NumRepeatsNearby   = repeats,
                NumApproxDupsLeft  = numRepeatsLeft,
                NumApproxDupsRight = numRepeats
            };

            indelIdentifier = Helper.CopyHashable(indelIdentifier, otherIndel);

            if (isDuplication && debug)
            {
                Console.WriteLine($"Found a duplication: {indelIdentifier.StringRepresentation}");
            }

            if (isRepeat && debug)
            {
                Console.WriteLine($"Found a repeat: {indelIdentifier.StringRepresentation}, {repeatUnit}");
            }

            return(indelIdentifier);
        }
 private bool IsMultiMatch(HashableIndel hashable, PreIndel indel)
 {
     // TODO shouldn't this also check the normal indel?
     return(hashable.InMulti && Helper.CandidateToString(indel) == hashable.OtherIndel);
 }
Пример #19
0
 private GenomeSnippet GetContext(HashableIndel indel, Dictionary <HashableIndel, GenomeSnippet> indelContexts)
 {
     return(indelContexts[indel]);
 }
Пример #20
0
 private string StringifyIndel(HashableIndel indel)
 {
     return(indel.StringRepresentation);
 }
Пример #21
0
        private RealignmentResult AddIndelAndGetResult(string readSequence, HashableIndel priorIndel,
                                                       string refSequence, bool anchorLeft, PositionMap positionMap, int refSequenceStartIndex, bool pairSpecific)
        {
            var  foundIndel = false;
            var  insertionPostionInReadStart     = -1;
            var  insertionPositionInReadEnd      = -1;
            var  deletionPositionInRead          = -1;
            bool anyPositionsAfterDeletionMapped = false;

            // TODO PERF can we bail out early if it's not possible that the indel could be inserted in the read, based on position?

            if (anchorLeft)
            {
                // move along position map to see if we can insert indel
                for (var i = 0; i < positionMap.Length; i++)
                {
                    if (positionMap.GetPositionAtIndex(i) == priorIndel.ReferencePosition && i != positionMap.Length - 1)  // make sure we dont end right before indel
                    {
                        foundIndel = true;

                        if (priorIndel.Type == AlleleCategory.Insertion)
                        {
                            insertionPostionInReadStart = i + 1;

                            // stick in -1 for insertion length, then adjust positions after
                            for (var j = i + 1; j < positionMap.Length; j++)
                            {
                                if (j - i <= priorIndel.Length)
                                {
                                    positionMap.UpdatePositionAtIndex(j, -1, true);
                                    if (j - i == priorIndel.Length || j == positionMap.Length - 1)
                                    {
                                        insertionPositionInReadEnd = j;
                                    }
                                }
                                else
                                {
                                    if (positionMap.GetPositionAtIndex(j) != -1) // preserve existing insertions
                                    {
                                        positionMap.UpdatePositionAtIndex(j, positionMap.GetPositionAtIndex(j) - priorIndel.Length);
                                    }
                                }
                            }
                            break;
                        }

                        if (priorIndel.Type == AlleleCategory.Deletion)
                        {
                            deletionPositionInRead = i;
                            // offset positions after deletion
                            for (var j = i + 1; j < positionMap.Length; j++)
                            {
                                if (positionMap.GetPositionAtIndex(j) != -1) // preserve existing insertions
                                {
                                    anyPositionsAfterDeletionMapped = true;
                                    positionMap.UpdatePositionAtIndex(j, positionMap.GetPositionAtIndex(j) + priorIndel.Length);
                                }
                            }
                            break;
                        }
                    }
                }
            }
            else
            {
                // walk backwards along position map to see if we can insert indel
                if (priorIndel.Type == AlleleCategory.Insertion)
                {
                    for (var i = positionMap.Length - 1; i >= 0; i--)
                    {
                        if (positionMap.GetPositionAtIndex(i) == priorIndel.ReferencePosition + 1 && i != 0)
                        {
                            foundIndel = true;
                            insertionPositionInReadEnd = i - 1;
                        }
                        else if (positionMap.GetPositionAtIndex(i) == priorIndel.ReferencePosition && i != positionMap.Length - 1)
                        {
                            foundIndel = true;
                            insertionPositionInReadEnd = i;
                        }

                        if (foundIndel)
                        {
                            // stick in -1 for insertion length, then adjust positions
                            for (var j = insertionPositionInReadEnd; j >= 0; j--)
                            {
                                if (insertionPositionInReadEnd - j + 1 <= priorIndel.Length)
                                {
                                    positionMap.UpdatePositionAtIndex(j, -1, true);
                                    if (insertionPositionInReadEnd - j + 1 == priorIndel.Length || j == 0)
                                    {
                                        insertionPostionInReadStart = j;
                                    }
                                }
                                else
                                {
                                    if (positionMap.GetPositionAtIndex(j) != -1) // Don't update position map for things that were already -1
                                    {
                                        positionMap.UpdatePositionAtIndex(j, positionMap.GetPositionAtIndex(j) + priorIndel.Length);
                                    }
                                }
                            }

                            break;
                        }
                    }
                }
                else if (priorIndel.Type == AlleleCategory.Deletion)
                {
                    for (var i = positionMap.Length - 1; i >= 1; i--)
                    {
                        if (positionMap.GetPositionAtIndex(i) == priorIndel.ReferencePosition + priorIndel.Length + 1) //deletions must be fully anchored to be observed
                        {
                            foundIndel = true;

                            deletionPositionInRead = i - 1;
                            // offset positions after deletion
                            for (var j = i - 1; j >= 0; j--)
                            {
                                if (positionMap.GetPositionAtIndex(j) != -1) // preserve existing insertions
                                {
                                    anyPositionsAfterDeletionMapped = true;
                                    positionMap.UpdatePositionAtIndex(j, positionMap.GetPositionAtIndex(j) - priorIndel.Length);
                                }
                            }

                            break;
                        }
                    }
                }
            }

            //if (!foundIndel || !Helper.IsValidMap(positionMap, refSequence))
            //TODO changed this just for tailor
            if (!foundIndel || (priorIndel.Type == AlleleCategory.Deletion && !anyPositionsAfterDeletionMapped) || !Helper.IsValidMap(positionMap.Map))
            {
                return(null);
            }

            var isSketchy = false;

            if (priorIndel.IsRepeat)
            {
                //if (priorIndel.Type == AlleleCategory.Deletion)
                //{
                //    if (Helper.RepeatDeletionFlankedByRepeats(readSequence, priorIndel, deletionPositionInRead))
                //    {
                //        return null;
                //    }
                //}

                //// TODO in the case of using sketchy anchor test:
                //// Ideally, we'd check the anchor length against how many repeats are in the reference vs the variant,
                //// ... Or maybe just always check the whole anchor if it's a repeat.
                var anchorLength = priorIndel.Type == AlleleCategory.Insertion ? Math.Min(insertionPostionInReadStart, readSequence.Length - insertionPositionInReadEnd) : Math.Min(deletionPositionInRead, readSequence.Length - deletionPositionInRead);
                if (anchorLength >= readSequence.Length)
                {
                    throw new Exception("Anchor should never be longer than read length."); // TODO remove after dev.
                }
                if (anchorLength < Math.Max(10, priorIndel.Length))
                {
                    if (priorIndel.Type == AlleleCategory.Deletion)
                    {
                        if (Helper.DeletionHasSketchyAnchor(readSequence, priorIndel, deletionPositionInRead))
                        {
                            if (pairSpecific)
                            {
                                isSketchy = true;
                            }
                            else
                            {
                                return(null);
                            }
                        }
                    }
                    else
                    {
                        if (priorIndel.NumBasesInReferenceSuffixBeforeUnique >= anchorLength)
                        {
                            if (pairSpecific)
                            {
                                isSketchy = true;
                            }
                            else
                            {
                                return(null);
                            }
                        }
                    }
                }
            }

            // TODO do we need to be more nuanced about this and only do it in duplication areas?
            if (priorIndel.Type == AlleleCategory.Deletion)
            {
                var anchorStart       = deletionPositionInRead + 1;
                var rightAnchorLength = readSequence.Length - anchorStart;
                if (rightAnchorLength < priorIndel.Length)
                {
                    if (anchorStart < readSequence.Length)
                    {
                        if (readSequence.Substring(anchorStart) ==
                            priorIndel.ReferenceAllele.Substring(1, rightAnchorLength))
                        {
                            return(null);
                        }
                    }
                }
            }

            if (priorIndel.IsDuplication && priorIndel.Type == AlleleCategory.Insertion)
            {
                // TODO return to this - I think the thought was to prevent FP dups, but the implementation may have been wrong
                // No partial duplications?
                //if (readSequence.Length - insertionPositionInReadEnd <= priorIndel.Length)

                if (readSequence.Length - insertionPositionInReadEnd <= 3)
                {
                    // Assumes priors are left-aligned
                    return(null);
                }
            }

            //verify insertion matches
            var newReadSequence = readSequence;
            var nifiedAt        = new List <int>();

            if (priorIndel.Type == AlleleCategory.Insertion)
            {
                if (insertionPostionInReadStart == -1 || insertionPositionInReadEnd == -1)
                {
                    return(null); // weird, this shouldnt ever happen
                }
                var readInsertedSequence = readSequence.Substring(insertionPostionInReadStart,
                                                                  insertionPositionInReadEnd - insertionPostionInReadStart + 1);

                var indelSequence = priorIndel.AlternateAllele.Substring(1);

                if (anchorLeft && readInsertedSequence.Length < indelSequence.Length && priorIndel.NumApproxDupsRight > 0)
                {
                    // Don't allow partial realignment to dups
                    return(null);
                }
                if (!anchorLeft && readInsertedSequence.Length < indelSequence.Length && priorIndel.NumApproxDupsLeft > 0)
                {
                    // Don't allow partial realignment to dups
                    return(null);
                }

                var clippedPriorSequence = anchorLeft
                    ? indelSequence.Substring(0, readInsertedSequence.Length)
                    : indelSequence.Substring(indelSequence.Length - readInsertedSequence.Length);

                var isMismatch = readInsertedSequence != clippedPriorSequence;
                if (isMismatch)
                {
                    int?mismatches     = null;
                    var mismatchesToDq = 0d;
                    if (priorIndel.Length >= _minInsertionSizeToAllowMismatchingBases && !(priorIndel.NumApproxDupsLeft + priorIndel.NumApproxDupsRight > 0))
                    {
                        mismatches = Helper.GetHammingNumMismatches(readInsertedSequence, clippedPriorSequence);

                        mismatchesToDq = priorIndel.Length * _maxProportionInsertSequenceMismatch;

                        if (mismatches > mismatchesToDq)
                        {
                            //Console.WriteLine(
                            //    $"Too many mismatches between insertions: {mismatches} > {maxAllowedMismatches} ({clippedPriorSequence} vs {readInsertedSequence})");
                        }
                        else
                        {
                            //Console.WriteLine(
                            //    $"Able to Nify mismatches between insertions: {mismatches} <= {maxAllowedMismatches} ({clippedPriorSequence} vs {readInsertedSequence})");

                            var newSequence =
                                Helper.NifyMismatches(clippedPriorSequence, readInsertedSequence, nifiedAt);
                            // TODO PERF is this actually necessary now that we're not actually Nifying? We can just keep the bases that we're Nifying at.
                            newReadSequence = readSequence.Substring(0, insertionPostionInReadStart) +
                                              newSequence.ToLower() +
                                              readSequence.Substring(insertionPositionInReadEnd + 1);
                            nifiedAt = nifiedAt.Select(x => x + insertionPostionInReadStart).ToList();
                        }
                    }

                    if (mismatches == null || (mismatches > mismatchesToDq))
                    {
                        return(null); // inserted sequence doesn't match read
                    }
                }
            }

            // TODO update to use PositionMap class
            var newCigar = Helper.ConstructCigar(positionMap.Map);

            // TODO moved this, and probably should in original Hygea too?
            // Also, can cut down the calls to positionmap.First() in the original
            //var readHasPosition = positionMap.Any(p => p > 0); // Position map is one-based, so should be >, not >= 0.
            if (!positionMap.HasAnyMappableBases())
            {
                throw new InvalidDataException(string.Format("Trying to generate result and read does not have any alignable bases. ({0}, {1})", newCigar, string.Join(",", positionMap)));
            }

            var startIndexInReference          = positionMap.FirstMappableBase() - 1; // Position map is one-based, so should be >, not >= 0.
            var startIndexInRefSequenceSnippet = startIndexInReference - refSequenceStartIndex;

            var newSummary = Extensions.GetAlignmentSummary(startIndexInRefSequenceSnippet, newCigar, refSequence,
                                                            newReadSequence, _trackActualMismatches, _checkSoftclipsForMismatches);

            if (newSummary == null)
            {
                return(null);
            }

            return(new RealignmentResult()
            {
                Cigar = newCigar,
                NumIndels = newCigar.NumIndels(),
                Position = startIndexInReference + 1,
                NumMismatches = newSummary.NumMismatches,
                NumNonNMismatches = newSummary.NumNonNMismatches,
                NumSoftclips = newSummary.NumSoftclips,
                NumNonNSoftclips = newSummary.NumNonNSoftclips,
                NumDeletedBases = newSummary.NumDeletedBases,
                NumInsertedBases = newSummary.NumInsertedBases,
                NumMatches = newSummary.NumMatches,
                NumIndelBases = newSummary.NumIndelBases,
                NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip,
                MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip,
                Indels = StringifyIndel(priorIndel),
                NifiedAt = nifiedAt,
                IndelsAddedAt = new List <int> {
                    priorIndel.Type == AlleleCategory.Insertion ? insertionPostionInReadStart : deletionPositionInRead
                },
                IsSketchy = isSketchy
            });
        }
        public void GetFinalAlignment()
        {
            var callbackIndelsList = new List <HashableIndel>();
            var mockIndelSource    = GetMockIndelSource(new List <HashableIndel>());
            var mockStatusHandler  = new Mock <IStatusHandler>();
            var readRealigner      = GetMockReadRealigner(null, callbackIndelsList);
            var realignmentJudger  = GetMockJudger(false, true, false);
            var filterer           = GetMockRegionFilterer();

            var evaluator = new RealignmentEvaluator(mockIndelSource.Object, mockStatusHandler.Object, readRealigner.Object, realignmentJudger.Object, "chr1", true, true, true, true, filterer.Object, true);

            // No indels to realign around, no need to call realignment
            var pair           = TestHelpers.GetPair("5M1I5M", "5M1I5M");
            var existingIndels = new List <PreIndel>()
            {
                new PreIndel(new CandidateAllele("chr1", 100, "A", "ATG", AlleleCategory.Insertion))
            };
            var alignment = evaluator.GetFinalAlignment(pair.Read1, out bool realigned, out bool forcedSoftclip, out bool confirmed, out bool sketchy, existingIndels: new List <PreIndel>());

            readRealigner.Verify(x => x.Realign(It.IsAny <Read>(), It.IsAny <List <HashableIndel> >(), It.IsAny <Dictionary <HashableIndel, GenomeSnippet> >(), It.IsAny <bool>(), It.IsAny <int>()), Times.Never);
            Assert.False(realigned);
            Assert.False(forcedSoftclip);
            Assert.Equal(alignment, pair.Read1);
            Assert.Equal("5M1I5M", alignment.CigarData.ToString());
            Assert.Equal(0.0, callbackIndelsList.Count);

            var indel = new HashableIndel()
            {
                ReferencePosition = 100,
                AlternateAllele   = "A",
                ReferenceAllele   = "ATT"
            };
            var indel2 = new HashableIndel()
            {
                ReferencePosition = 100,
                AlternateAllele   = "AAA",
                ReferenceAllele   = "A"
            };

            // Has indel to realign around and it failed, and it has a different indel, force softclip
            //pair = TestHelpers.GetPair("5M1I5M", "5M1I5M");
            //callbackIndelsList = new List<HashableIndel>();
            //readRealigner = GetMockReadRealigner(null, callbackIndelsList);
            //mockIndelSource = GetMockIndelSource(new List<HashableIndel>() {indel, indel2});

            //evaluator = new RealignmentEvaluator(mockIndelSource.Object, mockStatusHandler.Object, readRealigner.Object, realignmentJudger.Object, "chr1", true, true, true, true, filterer.Object, true);
            //alignment = evaluator.GetFinalAlignment(pair.Read1, out realigned, out forcedSoftclip, out confirmed, existingIndels: existingIndels);
            //readRealigner.Verify(x => x.Realign(It.IsAny<Read>(), It.IsAny<List<HashableIndel>>(),
            //    It.IsAny<Dictionary<HashableIndel, GenomeSnippet>>(), It.IsAny<bool>(), It.IsAny<int>()), Times.Once);
            //Assert.False(realigned);
            //Assert.True(forcedSoftclip);
            //Assert.Equal(alignment, pair.Read1);
            //Assert.Equal("5M6S", alignment.CigarData.ToString());
            //Assert.Equal(2, callbackIndelsList.Count); // Check indels passed to realigner

            // Has indel to realign around and it succeeds
            pair = TestHelpers.GetPair("5M1I5M", "5M1I5M");
            callbackIndelsList = new List <HashableIndel>();
            mockIndelSource    = GetMockIndelSource(new List <HashableIndel>()
            {
                indel, indel2
            });
            readRealigner = GetMockReadRealigner(new RealignmentResult()
            {
                AcceptedIndels = new List <int> {
                    1
                },
                Cigar = new CigarAlignment("4M1I6M"), NumMismatchesIncludeSoftclip = 0, Indels = "blah"
            }, callbackIndelsList);
            realignmentJudger = GetMockJudger(true, false, false);
            evaluator         = new RealignmentEvaluator(mockIndelSource.Object, mockStatusHandler.Object, readRealigner.Object, realignmentJudger.Object, "chr1", true, true, true, true, filterer.Object, true);
            alignment         = evaluator.GetFinalAlignment(pair.Read1, out realigned, out forcedSoftclip, out confirmed, out sketchy);
            readRealigner.Verify(x => x.Realign(It.IsAny <Read>(), It.IsAny <List <HashableIndel> >(), It.IsAny <Dictionary <HashableIndel, GenomeSnippet> >(), It.IsAny <bool>(), It.IsAny <int>()), Times.Once);
            Assert.True(realigned);
            Assert.False(forcedSoftclip);
            Assert.Equal("4M1I6M", alignment.CigarData.ToString());
            Assert.Equal(2, callbackIndelsList.Count); // Check indels passed to realigner

            // Has indel to realign around but not good enough. Also nothing to softclip.
            pair = TestHelpers.GetPair("11M", "11M");
            callbackIndelsList = new List <HashableIndel>();
            mockIndelSource    = GetMockIndelSource(new List <HashableIndel>()
            {
                indel, indel2
            });
            readRealigner = GetMockReadRealigner(new RealignmentResult()
            {
                Cigar = new CigarAlignment("4M1I6M"), NumMismatchesIncludeSoftclip = 0, Indels = "blah"
            }, callbackIndelsList);
            realignmentJudger = GetMockJudger(false, false, true);
            evaluator         = new RealignmentEvaluator(mockIndelSource.Object, mockStatusHandler.Object, readRealigner.Object, realignmentJudger.Object, "chr1", true, true, true, true, filterer.Object, true);
            alignment         = evaluator.GetFinalAlignment(pair.Read1, out realigned, out forcedSoftclip, out confirmed, out sketchy);
            readRealigner.Verify(x => x.Realign(It.IsAny <Read>(), It.IsAny <List <HashableIndel> >(), It.IsAny <Dictionary <HashableIndel, GenomeSnippet> >(), It.IsAny <bool>(), It.IsAny <int>()), Times.Once);
            Assert.False(realigned);
            Assert.False(forcedSoftclip);
            Assert.Equal("11M", alignment.CigarData.ToString());
            Assert.Equal(2, callbackIndelsList.Count); // Check indels passed to realigner

            //// Same as above: has indel to realign around but not good enough. Also nothing to softclip. But this time, it's (mocked) pair aware.
            //pair = TestHelpers.GetPair("11M", "11M");
            //callbackIndelsList = new List<HashableIndel>();
            //mockIndelSource = GetMockIndelSource(new List<HashableIndel>() { indel, indel2 });
            //readRealigner = GetMockReadRealigner(new RealignmentResult()
            //{
            //    AcceptedIndels = new List<int>() { 0},
            //    Cigar = new CigarAlignment("4M1I6M"), NumMismatchesIncludeSoftclip = 0, Indels = "blah"
            //}, callbackIndelsList);
            //realignmentJudger = GetMockJudger(false, false, true);
            //evaluator = new RealignmentEvaluator(mockIndelSource.Object, mockStatusHandler.Object, readRealigner.Object, realignmentJudger.Object, "chr1", true, true, true, true,filterer.Object, true);
            //alignment = evaluator.GetFinalAlignment(pair.Read1, out realigned, out forcedSoftclip, out confirmed, selectedIndels: new List<PreIndel>() { new PreIndel(new CandidateAllele("chr1", 100, "A", "ATC", AlleleCategory.Insertion)) });
            //readRealigner.Verify(x => x.Realign(It.IsAny<Read>(), It.IsAny<List<HashableIndel>>(), It.IsAny<Dictionary<HashableIndel, GenomeSnippet>>(), It.IsAny<bool>(), It.IsAny<int>()), Times.Once);
            //Assert.True(realigned);
            //Assert.False(forcedSoftclip);
            //Assert.Equal("4M1I6M", alignment.CigarData.ToString());
            //Assert.Equal(2, callbackIndelsList.Count);
        }
        public void GetFinalReads()
        {
            var  mockIndelFinder  = new Mock <IPairSpecificIndelFinder>();
            var  mockEvaluator    = new Mock <IRealignmentEvaluator>();
            var  mockNmCalculator = new Mock <INmCalculator>();
            bool realigned        = true;
            bool softclipped      = false;
            bool confirmed        = false;
            bool sketchy          = false;

            mockEvaluator
            .Setup(x => x.GetFinalAlignment(It.IsAny <BamAlignment>(), out realigned, out softclipped, out confirmed, out sketchy,
                                            It.IsAny <List <PreIndel> >(), It.IsAny <List <PreIndel> >(), It.IsAny <bool>(),
                                            It.IsAny <List <HashableIndel> >(), It.IsAny <List <PreIndel> >()))
            .Returns <BamAlignment, bool, bool, bool, bool, List <PreIndel>, List <PreIndel>, bool, List <HashableIndel>,
                      List <PreIndel> >((b, r, sc, conf, s, i, i2, z, c, mateIndels) =>
            {
                return(new BamAlignment(b)
                {
                    Position = b.IsReverseStrand() ? 10 : b.Position
                });
            });
            var hashable = new HashableIndel()
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "AT",
                Type = AlleleCategory.Insertion
            };

            mockEvaluator.Setup(x => x.GetIndelOutcomes()).Returns(new Dictionary <HashableIndel, int[]>()
            {
                { hashable, new int[] { 0, 1, 2, 3, 4, 5, 6 } }
            });
            var mockReadRestitcher = new Mock <IReadRestitcher>();

            var masterLookup         = new Dictionary <string, IndelEvidence>();
            var masterOutcomesLookup = new Dictionary <HashableIndel, int[]>();
            var pairRealigner        = new ReadPairRealignerAndCombiner(new SnowballEvidenceCollector(new IndelTargetFinder()),
                                                                        mockReadRestitcher.Object,
                                                                        mockEvaluator.Object,
                                                                        mockIndelFinder.Object, "chr1", false, masterLookup: masterLookup,
                                                                        masterOutcomesLookup: masterOutcomesLookup);

            var unpairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));

            unpairedMates.ReadPair.PairStatus = PairStatus.SplitQuality;

            // Non-paired
            var reads = pairRealigner.ExtractReads(unpairedMates, mockNmCalculator.Object);

            Assert.Equal(2, reads.Count);
            // Should set realigned position as mate positions
            Assert.Equal(10, reads[0].MatePosition);
            Assert.Equal(99, reads[1].MatePosition);

            // Paired but fail re-stitching
            var pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));

            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object);
            Assert.Equal(2, reads.Count);
            // Should set realigned position as mate positions
            Assert.Equal(10, reads[0].MatePosition);
            Assert.Equal(99, reads[1].MatePosition);

            // Instructed to silence both reads, but was realigned, so don't silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, true, 3);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 30));
            Assert.True(reads[1].Qualities.All(x => x == 30));

            // Instructed to silence both reads, but was realigned, so don't silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, false, 3);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 0));
            Assert.True(reads[1].Qualities.All(x => x == 0));

            // Instructed to silence R1, but was realigned, so don't silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, true, 1);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 30));
            Assert.True(reads[1].Qualities.All(x => x == 30));

            // Instructed to silence R1, but was realigned, so don't silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, false, 1);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 0));
            Assert.True(reads[1].Qualities.All(x => x == 30));

            // Instructed to silence R2, but was realigned, so don't silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, true, 2);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 30));
            Assert.True(reads[1].Qualities.All(x => x == 30));

            // Instructed to silence R2, was not realigned, so silence
            pairedMates = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));
            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1, b2
            });
            reads = pairRealigner.ExtractReads(pairedMates, mockNmCalculator.Object, false, 2);
            Assert.Equal(2, reads.Count);
            Assert.True(reads[0].Qualities.All(x => x == 30));
            Assert.True(reads[1].Qualities.All(x => x == 0));


            // Paired and succeed re-stitching
            var pairedMatesStitchable = GetPairResult(TestHelpers.GetPair("5M1I5M", "5M1I5M"));

            mockReadRestitcher.Setup(x => x.GetRestitchedReads(It.IsAny <ReadPair>(), It.IsAny <BamAlignment>(),
                                                               It.IsAny <BamAlignment>(), It.IsAny <int?>(), It.IsAny <int?>(), It.IsAny <bool>(),
                                                               It.IsAny <INmCalculator>(), It.IsAny <bool>(), It.IsAny <bool>()))
            .Returns <ReadPair, BamAlignment, BamAlignment, int?, int?, bool, INmCalculator, bool, bool>(
                (p, b1, b2, n1, n2, r, nc, doRecalc, s) =>
                new List <BamAlignment>()
            {
                b1
            });
            reads = pairRealigner.ExtractReads(pairedMatesStitchable, mockNmCalculator.Object);
            Assert.Equal(1.0, reads.Count);
            Assert.Equal(-1, reads[0].MatePosition);

            // Master outcomes gets added to at the end
            Assert.Empty(masterOutcomesLookup);
            pairRealigner.Finish();
            Assert.Single(masterOutcomesLookup);
        }
Пример #24
0
        public void DeletionHasSketchyAnchor()
        {
            var rptADeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TAAAA",
                IsRepeat        = true,
                RepeatUnit      = "A"
            };

            var rptDinucDeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TACAC",
                IsRepeat        = true,
                RepeatUnit      = "AC"
            };

            Assert.True(Helper.DeletionHasSketchyAnchor("ACCCCC", rptADeletion, 0));
            Assert.True(Helper.DeletionHasSketchyAnchor("AACCCCC", rptADeletion, 0));
            Assert.True(Helper.DeletionHasSketchyAnchor("AAAAACCCCC", rptADeletion, 0));
            Assert.True(Helper.DeletionHasSketchyAnchor("AAACCCCC", rptADeletion, 1));
            Assert.True(Helper.DeletionHasSketchyAnchor("CAAAA", rptADeletion, 0));
            Assert.True(Helper.DeletionHasSketchyAnchor("CCCAAAA", rptADeletion, 2));
            //Assert.False(Helper.DeletionHasSketchyAnchor("CCCAAAA", rptADeletion, 0));
            Assert.False(Helper.DeletionHasSketchyAnchor("CCCAAAA", rptADeletion, 1));
            Assert.False(Helper.DeletionHasSketchyAnchor("CTTTT", rptADeletion, 0));
            Assert.False(Helper.DeletionHasSketchyAnchor("TAAAAT", rptADeletion, 0));

            Assert.True(Helper.DeletionHasSketchyAnchor("CCCCTA", rptADeletion, 4));
            Assert.False(Helper.DeletionHasSketchyAnchor("CCCCTAT", rptADeletion, 4));
            Assert.False(Helper.DeletionHasSketchyAnchor("TCCCCC", rptADeletion, 0));
            Assert.True(Helper.DeletionHasSketchyAnchor("TTTACACACAC", rptDinucDeletion, 2));
            Assert.False(Helper.DeletionHasSketchyAnchor("TTTACACACACT", rptDinucDeletion, 2));
            Assert.True(Helper.DeletionHasSketchyAnchor("TTTACACACA", rptDinucDeletion, 2));
            Assert.True(Helper.DeletionHasSketchyAnchor("ACACACAC", rptDinucDeletion, 1));


            var rptTDeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TTTTA",
                IsRepeat        = true,
                RepeatUnit      = "T"
            };
            var rptTriDeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TTCATCA",
                IsRepeat        = true,
                RepeatUnit      = "TCA"
            };



            Assert.True(Helper.DeletionHasSketchyAnchor("TTTGCTATCAATCACAGGTATACAAGTACTTGCCTTTACTCCTGCATGTAGAAGACTCTTATGAGCGAGATAATGCAGAGAAGGCCTTTCATATAAATT", rptTDeletion, 2));


            Assert.True(Helper.DeletionHasSketchyAnchor("CCATTCTGATTTGACTTTTGTGCATCTTTGGCTCGAGTATCTCATATAGATTACTCGTGCTTTTCTTCAGCTTCCTCATCATCAAAATCTTTATCATTTT", rptTriDeletion, 98));
            Assert.False(Helper.DeletionHasSketchyAnchor("CCATTCTGATTTGACTTTTGTGCATCTTTGGCTCGAGTATCTCATATAGATTACTCGTGCTTTTCTTCAGCTTCCTCATCATCAAAATCTTTATCATTTT", rptTriDeletion, 97));
            Assert.False(Helper.DeletionHasSketchyAnchor("CCATTCTGATTTGACTTTTGTGCATCTTTGGCTCGAGTATCTCATATAGATTACTCGTGCTTTTCTTCAGCTTCCTCATCATCAAAATCTTTATCATTTT", rptTriDeletion, 96));
            Assert.False(Helper.DeletionHasSketchyAnchor("CCATTCTGATTTGACTTTTGTGCATCTTTGGCTCGAGTATCTCATATAGATTACTCGTGCTTTTCTTCAGCTTCCTCATCATCAAAATCTTTATCATTTT", rptTriDeletion, 99));


            var rptLongDeletion = new HashableIndel()
            {
                AlternateAllele = "T",
                ReferenceAllele = "TTCAGTCG",
                IsRepeat        = true,
                RepeatUnit      = "TCAGTCG"
            };

            Assert.False(Helper.DeletionHasSketchyAnchor("CTTTTATTA", rptLongDeletion, 1));
            Assert.False(Helper.DeletionHasSketchyAnchor("CTTTTATTA", rptLongDeletion, 0));
            Assert.False(Helper.DeletionHasSketchyAnchor("GTA", rptLongDeletion, 1));
            Assert.False(Helper.DeletionHasSketchyAnchor("GTA", rptLongDeletion, 0));
            Assert.False(Helper.DeletionHasSketchyAnchor("GTA", rptLongDeletion, 2));
        }