Ejemplo n.º 1
0
        private void RecalculateIndelAndAddIfNeeded(bool allowRescue, KeyValuePair <HashableIndel, List <IndelEvidence> > indelToRecalculate,
                                                    IndelStatusCounter statusCounter, double edgeThreshold, List <PreIndel> indelsToAdd)
        {
            var hashable = indelToRecalculate.Key;
            var indel    = new PreIndel(new CandidateAllele(hashable.Chromosome, hashable.ReferencePosition,
                                                            hashable.ReferenceAllele, hashable.AlternateAllele, hashable.Type));

            indel.InMulti    = hashable.InMulti;
            indel.OtherIndel = hashable.OtherIndel;

            var metrics = new IndelEvidence();

            foreach (var metricsList in indelToRecalculate.Value)
            {
                metrics.AddIndelEvidence(metricsList);
            }

            var entryIndels = ExtractIndelsFromEntry(metrics, indel.ToString() + "|" + indel.OtherIndel,
                                                     statusCounter, edgeThreshold, allowRescue, new List <PreIndel>()
            {
                indel
            });

            if (entryIndels != null)
            {
                indelsToAdd.AddRange(entryIndels);
            }
        }
Ejemplo n.º 2
0
        private bool ShouldRemoveVariant(int observationCount, float avgAnchorLeft, float avgAnchorRight, bool isStrong,
                                         IndelStatusCounter statusCounter, float avgQuals, float avgMess, int anchorLeft, int anchorRight, double edgeThreshold, IndelEvidence evidence)
        {
            if (observationCount < _foundThreshold || avgAnchorLeft < _anchorThreshold || avgAnchorRight < _anchorThreshold || avgMess > _maxMess)
            {
                if (isStrong)
                {
                    evidence.Outcome = Outcome.Rescued;
                    statusCounter.Rescued++;
                }
                else
                {
                    evidence.Outcome = Outcome.BelowThreshold;
                    statusCounter.BelowThreshold++;
                    return(true);
                }
            }


            if (observationCount == 1 && (Math.Min(anchorLeft, anchorRight) < 5 || avgMess > 1 || avgQuals < 30))
            {
                evidence.Outcome = Outcome.PoorSingle;
                statusCounter.PoorSingle++;
                // Even if we want to allow single-observation variants to be realigned against, maybe let's avoid the really junky ones
                return(true);
            }

            if ((observationCount <= edgeThreshold) && (avgMess > 2 || avgQuals < 25))
            {
                evidence.Outcome = Outcome.PoorEdge;
                statusCounter.PoorEdge++;
                return(true);
            }

            return(false);
        }
Ejemplo n.º 3
0
        public Dictionary <string, List <PreIndel> > GetRealignablePreIndels(Dictionary <string, IndelEvidence> indelStringLookup, bool allowRescue, int regionEdgeThreshold = int.MaxValue)
        {
            var statusCounter            = new IndelStatusCounter();
            var edgeThreshold            = Math.Max(_foundThreshold + 1, _foundThreshold * 1.5);
            var indelsToAdd              = new List <PreIndel>();
            var multiIndelsToRecalculate = new Dictionary <HashableIndel, List <IndelEvidence> >();
            var indelsToRemove           = new List <string>();

            var numImmediatelySkipped = 0;
            var numProcessed          = 0;

            // TODO different way of doing this, bc we don't use the indel after that
            var indelsLookup = new Dictionary <string, List <PreIndel> >();

            foreach (var key in indelStringLookup.Keys)
            {
                var indelMetrics      = indelStringLookup[key];
                var keepForNextRegion = indelMetrics.Position >= regionEdgeThreshold;

                if (indelMetrics.Observations == 0 && !keepForNextRegion)
                {
                    indelMetrics.Outcome = Outcome.LowObservations;
                    continue;
                }
                numProcessed++;

                if (indelMetrics.Observations < _strictFoundThreshold && !keepForNextRegion)
                {
                    indelMetrics.Outcome = Outcome.LowObservations;
                    numImmediatelySkipped++;
                    continue;
                }

                // No reputable evidence!
                if (indelMetrics.ReputableSupport < 1 && !keepForNextRegion)
                {
                    indelMetrics.Outcome = Outcome.LowReputableSupport;
                    numImmediatelySkipped++;
                    continue;
                }

                var entryIndelKeys = ExtractIndelsFromKeyString(key);
                if (entryIndelKeys == null)
                {
                    continue;
                }
                if (entryIndelKeys.Count > 1)
                {
                    foreach (var entryIndel in entryIndelKeys)
                    {
                        var multiKey = GetHashableIndel(entryIndel);

                        if (!multiIndelsToRecalculate.TryGetValue(multiKey, out var existingIndelMetrics))
                        {
                            existingIndelMetrics = new List <IndelEvidence>();
                            multiIndelsToRecalculate[multiKey] = existingIndelMetrics;
                        }
                        existingIndelMetrics.Add(indelMetrics);
                    }
                }
                else
                {
                    var entryIndels = ExtractIndelsFromEntry(indelMetrics, key, statusCounter, edgeThreshold,
                                                             allowRescue, entryIndelKeys);
                    if (entryIndels != null)
                    {
                        indelsToAdd.AddRange(entryIndels);
                    }
                    else
                    {
                        indelsToRemove.Add(key);
                    }
                }
            }

            foreach (var indelToRecalculate in multiIndelsToRecalculate)
            {
                RecalculateIndelAndAddIfNeeded(allowRescue, indelToRecalculate, statusCounter, edgeThreshold, indelsToAdd);
            }

            foreach (var indel in indelsToAdd)
            {
                if (!indelsLookup.TryGetValue(indel.Chromosome, out var indelsForChrom))
                {
                    indelsForChrom = new List <PreIndel>();
                    indelsLookup.Add(indel.Chromosome, indelsForChrom);
                }
                indelsForChrom.Add(indel);
            }

            foreach (var badIndel in indelsToRemove)
            {
                indelStringLookup.Remove(badIndel);
            }

            statusCounter.BelowThreshold += numImmediatelySkipped;
            return(indelsLookup);
        }
Ejemplo n.º 4
0
        private List <PreIndel> ExtractIndelsFromEntry(IndelEvidence indelMetrics, string keyString,
                                                       IndelStatusCounter statusCounter,
                                                       double edgeThreshold, bool allowRescue, List <PreIndel> indels)
        {
            var indelsToAdd              = new List <PreIndel>();
            var observationCount         = indelMetrics.Observations;
            var anchorLeft               = indelMetrics.LeftAnchor;
            var anchorRight              = indelMetrics.RightAnchor;
            var mess                     = indelMetrics.Mess;
            var quals                    = indelMetrics.Quality;
            var fwdSupport               = indelMetrics.Forward / (float)observationCount;
            var reverseSupport           = indelMetrics.Reverse / (float)observationCount;
            var stitchedSupport          = indelMetrics.Stitched / (float)observationCount;
            var reputableSupportFraction = indelMetrics.ReputableSupport / (float)observationCount;
            var numFromUnanchoredRepeat  = indelMetrics.IsRepeat;
            var numFromMateUnmapped      = indelMetrics.IsSplit;

            var avgAnchorLeft  = anchorLeft / (float)observationCount;
            var avgAnchorRight = anchorRight / (float)observationCount;

            var avgQuals = quals / (float)observationCount;
            var avgMess  = mess / (float)observationCount;

            // TODO clean this up, no more magic
            bool isStrong = false;

            if (allowRescue)
            {
                isStrong = IsStrong(avgQuals, reputableSupportFraction, avgAnchorLeft, avgMess, avgAnchorRight,
                                    reverseSupport, observationCount, fwdSupport, keyString, stitchedSupport);
            }

            if (indels.Count > 2)
            {
                Logger.WriteToLog(
                    $"Can't support more than 2 indels in one read: ignoring multi-indel {keyString} (seen {observationCount} times)");
            }
            else if (indels.Count > 1)
            {
                var indel1 = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                               reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);
                var indel2 = GetIndelFromEntry(indels[1], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                               reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);

                indel1.InMulti = true;
                indel2.InMulti = true;

                indel1.OtherIndel = Helper.CandidateToString(indel2);
                indel2.OtherIndel = Helper.CandidateToString(indel1);

                indelsToAdd.Add(indel1);
                indelsToAdd.Add(indel2);
            }
            else
            {
                var indel = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport,
                                              reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat);

                indelsToAdd.Add(indel);
            }

            if (indels.Count == 1 && indelsToAdd[0].Length == 1 && (observationCount < _foundThreshold * 0.8 || observationCount <= 2))
            {
                indelMetrics.Outcome = Outcome.SuperWeakSmall;
                return(null);
            }
            if (ShouldRemoveVariant(observationCount, avgAnchorLeft, avgAnchorRight, isStrong, statusCounter,
                                    avgQuals,
                                    avgMess, anchorLeft, anchorRight, edgeThreshold, indelMetrics))
            {
                return(null);
            }

            statusCounter.Kept++;

            return(indelsToAdd);
        }