private void RecalculateIndelAndAddIfNeeded(bool allowRescue, KeyValuePair <HashableIndel, List <IndelEvidence> > indelToRecalculate, IndelStatusCounter statusCounter, double edgeThreshold, List <PreIndel> indelsToAdd) { var hashable = indelToRecalculate.Key; var indel = new PreIndel(new CandidateAllele(hashable.Chromosome, hashable.ReferencePosition, hashable.ReferenceAllele, hashable.AlternateAllele, hashable.Type)); indel.InMulti = hashable.InMulti; indel.OtherIndel = hashable.OtherIndel; var metrics = new IndelEvidence(); foreach (var metricsList in indelToRecalculate.Value) { metrics.AddIndelEvidence(metricsList); } var entryIndels = ExtractIndelsFromEntry(metrics, indel.ToString() + "|" + indel.OtherIndel, statusCounter, edgeThreshold, allowRescue, new List <PreIndel>() { indel }); if (entryIndels != null) { indelsToAdd.AddRange(entryIndels); } }
private bool ShouldRemoveVariant(int observationCount, float avgAnchorLeft, float avgAnchorRight, bool isStrong, IndelStatusCounter statusCounter, float avgQuals, float avgMess, int anchorLeft, int anchorRight, double edgeThreshold, IndelEvidence evidence) { if (observationCount < _foundThreshold || avgAnchorLeft < _anchorThreshold || avgAnchorRight < _anchorThreshold || avgMess > _maxMess) { if (isStrong) { evidence.Outcome = Outcome.Rescued; statusCounter.Rescued++; } else { evidence.Outcome = Outcome.BelowThreshold; statusCounter.BelowThreshold++; return(true); } } if (observationCount == 1 && (Math.Min(anchorLeft, anchorRight) < 5 || avgMess > 1 || avgQuals < 30)) { evidence.Outcome = Outcome.PoorSingle; statusCounter.PoorSingle++; // Even if we want to allow single-observation variants to be realigned against, maybe let's avoid the really junky ones return(true); } if ((observationCount <= edgeThreshold) && (avgMess > 2 || avgQuals < 25)) { evidence.Outcome = Outcome.PoorEdge; statusCounter.PoorEdge++; return(true); } return(false); }
public Dictionary <string, List <PreIndel> > GetRealignablePreIndels(Dictionary <string, IndelEvidence> indelStringLookup, bool allowRescue, int regionEdgeThreshold = int.MaxValue) { var statusCounter = new IndelStatusCounter(); var edgeThreshold = Math.Max(_foundThreshold + 1, _foundThreshold * 1.5); var indelsToAdd = new List <PreIndel>(); var multiIndelsToRecalculate = new Dictionary <HashableIndel, List <IndelEvidence> >(); var indelsToRemove = new List <string>(); var numImmediatelySkipped = 0; var numProcessed = 0; // TODO different way of doing this, bc we don't use the indel after that var indelsLookup = new Dictionary <string, List <PreIndel> >(); foreach (var key in indelStringLookup.Keys) { var indelMetrics = indelStringLookup[key]; var keepForNextRegion = indelMetrics.Position >= regionEdgeThreshold; if (indelMetrics.Observations == 0 && !keepForNextRegion) { indelMetrics.Outcome = Outcome.LowObservations; continue; } numProcessed++; if (indelMetrics.Observations < _strictFoundThreshold && !keepForNextRegion) { indelMetrics.Outcome = Outcome.LowObservations; numImmediatelySkipped++; continue; } // No reputable evidence! if (indelMetrics.ReputableSupport < 1 && !keepForNextRegion) { indelMetrics.Outcome = Outcome.LowReputableSupport; numImmediatelySkipped++; continue; } var entryIndelKeys = ExtractIndelsFromKeyString(key); if (entryIndelKeys == null) { continue; } if (entryIndelKeys.Count > 1) { foreach (var entryIndel in entryIndelKeys) { var multiKey = GetHashableIndel(entryIndel); if (!multiIndelsToRecalculate.TryGetValue(multiKey, out var existingIndelMetrics)) { existingIndelMetrics = new List <IndelEvidence>(); multiIndelsToRecalculate[multiKey] = existingIndelMetrics; } existingIndelMetrics.Add(indelMetrics); } } else { var entryIndels = ExtractIndelsFromEntry(indelMetrics, key, statusCounter, edgeThreshold, allowRescue, entryIndelKeys); if (entryIndels != null) { indelsToAdd.AddRange(entryIndels); } else { indelsToRemove.Add(key); } } } foreach (var indelToRecalculate in multiIndelsToRecalculate) { RecalculateIndelAndAddIfNeeded(allowRescue, indelToRecalculate, statusCounter, edgeThreshold, indelsToAdd); } foreach (var indel in indelsToAdd) { if (!indelsLookup.TryGetValue(indel.Chromosome, out var indelsForChrom)) { indelsForChrom = new List <PreIndel>(); indelsLookup.Add(indel.Chromosome, indelsForChrom); } indelsForChrom.Add(indel); } foreach (var badIndel in indelsToRemove) { indelStringLookup.Remove(badIndel); } statusCounter.BelowThreshold += numImmediatelySkipped; return(indelsLookup); }
private List <PreIndel> ExtractIndelsFromEntry(IndelEvidence indelMetrics, string keyString, IndelStatusCounter statusCounter, double edgeThreshold, bool allowRescue, List <PreIndel> indels) { var indelsToAdd = new List <PreIndel>(); var observationCount = indelMetrics.Observations; var anchorLeft = indelMetrics.LeftAnchor; var anchorRight = indelMetrics.RightAnchor; var mess = indelMetrics.Mess; var quals = indelMetrics.Quality; var fwdSupport = indelMetrics.Forward / (float)observationCount; var reverseSupport = indelMetrics.Reverse / (float)observationCount; var stitchedSupport = indelMetrics.Stitched / (float)observationCount; var reputableSupportFraction = indelMetrics.ReputableSupport / (float)observationCount; var numFromUnanchoredRepeat = indelMetrics.IsRepeat; var numFromMateUnmapped = indelMetrics.IsSplit; var avgAnchorLeft = anchorLeft / (float)observationCount; var avgAnchorRight = anchorRight / (float)observationCount; var avgQuals = quals / (float)observationCount; var avgMess = mess / (float)observationCount; // TODO clean this up, no more magic bool isStrong = false; if (allowRescue) { isStrong = IsStrong(avgQuals, reputableSupportFraction, avgAnchorLeft, avgMess, avgAnchorRight, reverseSupport, observationCount, fwdSupport, keyString, stitchedSupport); } if (indels.Count > 2) { Logger.WriteToLog( $"Can't support more than 2 indels in one read: ignoring multi-indel {keyString} (seen {observationCount} times)"); } else if (indels.Count > 1) { var indel1 = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); var indel2 = GetIndelFromEntry(indels[1], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); indel1.InMulti = true; indel2.InMulti = true; indel1.OtherIndel = Helper.CandidateToString(indel2); indel2.OtherIndel = Helper.CandidateToString(indel1); indelsToAdd.Add(indel1); indelsToAdd.Add(indel2); } else { var indel = GetIndelFromEntry(indels[0], anchorLeft, anchorRight, observationCount, mess, fwdSupport, reverseSupport, reputableSupportFraction, avgQuals, stitchedSupport, numFromMateUnmapped, numFromUnanchoredRepeat); indelsToAdd.Add(indel); } if (indels.Count == 1 && indelsToAdd[0].Length == 1 && (observationCount < _foundThreshold * 0.8 || observationCount <= 2)) { indelMetrics.Outcome = Outcome.SuperWeakSmall; return(null); } if (ShouldRemoveVariant(observationCount, avgAnchorLeft, avgAnchorRight, isStrong, statusCounter, avgQuals, avgMess, anchorLeft, anchorRight, edgeThreshold, indelMetrics)) { return(null); } statusCounter.Kept++; return(indelsToAdd); }