private void AddAlleleCounts(Read alignment) { var lastPosition = alignment.Position - 1; var deletionLength = 0; var lengthBeforeDeletion = alignment.ReadLength; var endsInDeletion = alignment.CigarData.HasOperationAtOpIndex(1, 'D', true); var endsInDeletionBeforeSoftclip = alignment.CigarData.HasOperationAtOpIndex(2, 'D', true) && alignment.CigarData.HasOperationAtOpIndex(1, 'S', true); if (endsInDeletion || endsInDeletionBeforeSoftclip) { deletionLength = (int)(endsInDeletionBeforeSoftclip ? alignment.CigarData[alignment.CigarData.Count - 2].Length : alignment.CigarData[alignment.CigarData.Count - 1].Length); lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - alignment.CigarData[alignment.CigarData.Count - 1].Length : alignment.ReadLength); } for (var i = 0; i < alignment.PositionMap.Length; i++) { if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[i]); } } var position = alignment.PositionMap[i]; if (position == -1) { continue; // not mapped to reference } for (var j = lastPosition + 1; j < position; j++) // add any deletion counts { AddAlleleCount(j, AlleleType.Deletion, alignment.DirectionMap[i]); } var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]); if (alignment.Qualities[i] < _minBasecallQuality) { alleleType = AlleleType.N; // record this event as a no call } AddAlleleCount(position, alleleType, alignment.DirectionMap[i]); lastPosition = position; } if (endsInDeletion) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[alignment.DirectionMap.Length - 1]); } } }
public void GetAlleleType() { Assert.Equal(AlleleType.A, AlleleHelper.GetAlleleType("A")); Assert.Equal(AlleleType.G, AlleleHelper.GetAlleleType("G")); Assert.Equal(AlleleType.C, AlleleHelper.GetAlleleType("C")); Assert.Equal(AlleleType.T, AlleleHelper.GetAlleleType("T")); Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("N")); Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("U")); }
protected virtual void CalculateSinglePoint(CalledAllele allele, IAlleleSource alleleCountSource) { //TODO: Is there a reason why we don't reallocate the stitched coverage here for point mutations? (as we do with spanning ones) // sum up all observations at that point var variant = allele as CalledAllele; for (var direction = 0; direction < Constants.NumDirectionTypes; direction++) { foreach (var alleleType in Constants.CoverageContributingAlleles) { allele.EstimatedCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.ReferencePosition, alleleType, (DirectionType)direction); allele.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(allele.ReferencePosition, alleleType, (DirectionType)direction); if (alleleType != AlleleHelper.GetAlleleType(allele.ReferenceAllele)) { continue; } if (variant != null) { variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.ReferencePosition, alleleType, (DirectionType)direction); } } allele.TotalCoverage += allele.EstimatedCoverageByDirection[direction]; // For single point variants, for now, we're calling everything confident coverage allele.ConfidentCoverageStart += allele.EstimatedCoverageByDirection[direction]; allele.ConfidentCoverageEnd += allele.EstimatedCoverageByDirection[direction]; allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.ReferencePosition, AlleleType.N, (DirectionType)direction); } // adjust for reference counts already taken up by gapped mnvs // note: it's possible that the ref count taken up by a gapped mnv is greater than depth at that ref position. // this is possible when collapsing is true, and some gapped ref positions have low quality (or are N). // in these cases, they get collapsed to the mnv and count towards support, but those specific alleles were never added to region's allele counts because they are low quality. // collapsing is the correct thing to do, so this is ok. we should just make sure to cap at 0. var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.ReferencePosition); if (allele.Type == AlleleCategory.Snv && variant != null) { variant.ReferenceSupport = Math.Max(0, variant.ReferenceSupport - gappedRefCounts); } else if (allele.Type == AlleleCategory.Reference) { allele.AlleleSupport = Math.Max(0, allele.AlleleSupport - gappedRefCounts); } }
private static void CalculateSinglePoint(BaseCalledAllele allele, IStateManager alleleCountSource) { var variant = allele as CalledVariant; for (var direction = 0; direction < Constants.NumDirectionTypes; direction++) { foreach (var alleleType in Constants.CoverageContributingAlleles) { allele.TotalCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.Coordinate, alleleType, (DirectionType)direction); if (alleleType != AlleleHelper.GetAlleleType(allele.Reference)) { continue; } if (variant != null) { variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.Coordinate, alleleType, (DirectionType)direction); } } allele.TotalCoverage += allele.TotalCoverageByDirection[direction]; allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.Coordinate, AlleleType.N, (DirectionType)direction); } // adjust for reference counts already taken up by gapped mnvs var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.Coordinate); if (allele.Type == AlleleCategory.Snv && variant != null) { variant.ReferenceSupport -= gappedRefCounts; } else if (allele.Type == AlleleCategory.Reference) { allele.AlleleSupport -= gappedRefCounts; } }
public void AddAlleleCounts(Read alignment) { if (!_readLength.HasValue) { _readLength = alignment.ReadLength; } var lastPosition = alignment.Position - 1; var cigarData = alignment.CigarData; var deletionLength = 0; var lengthBeforeDeletion = alignment.ReadLength; var endsInDeletion = cigarData.HasOperationAtOpIndex(0, 'D', true); var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true); if (endsInDeletion || endsInDeletionBeforeSoftclip) { deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length : cigarData[cigarData.Count - 1].Length); lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length : alignment.ReadLength); } var positionMapLength = alignment.PositionMap.Length; var alignmentEndPosition = alignment.EndPosition; var alignmentStartPosition = alignment.Position; for (var positionMapIndex = 0; positionMapIndex < positionMapLength; positionMapIndex++) { DirectionType directionType = alignment.SequencedBaseDirectionMap[positionMapIndex]; if ((endsInDeletionBeforeSoftclip) && positionMapIndex == lengthBeforeDeletion) { if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality)) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { var anchorIndex = NumAnchorIndexes - 1; // Last index AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex); AddCollapsedReadCount(j + lastPosition, alignment, directionType); } } } var position = alignment.PositionMap.GetPositionAtIndex(positionMapIndex); if (position == -1) { continue; // not mapped to reference } var anchorType = GetAnchorType(alignmentEndPosition, position, alignmentStartPosition); //if the deletion is of decent quality, add it to the counts matix if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality)) { for (var j = lastPosition + 1; j < position; j++) // add any deletion counts { AddAlleleCount(j, AlleleType.Deletion, directionType, anchorType); AddCollapsedReadCount(j, alignment, directionType); } } var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[positionMapIndex]); if (alignment.Qualities[positionMapIndex] < _minBasecallQuality) { alleleType = AlleleType.N; // record this event as a no call } AddAlleleCount(position, alleleType, directionType, anchorType); if (alleleType != AlleleType.N) { AddCollapsedReadCount(position, alignment, directionType); AddAmpliconCount(_trackAmpliconCounts, position, alignment.GetAmpliconNameIfExists()); } AddAlleleBaseQuality(position, alleleType, directionType, Math.Pow(10, -1 * (int)alignment.Qualities[positionMapIndex] / 10f), anchorType); lastPosition = position; } if (endsInDeletion) { if (CandidateVariantFinder.CheckDeletionQuality(alignment, alignment.SequencedBaseDirectionMap.Length - 1, _minBasecallQuality)) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { DirectionType directionType = alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1]; var anchorIndex = NumAnchorIndexes - 1; // Last index AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex); // ends in deletion -> within 1 AddCollapsedReadCount(j + lastPosition, alignment, directionType); } } } // add coverage summary if (_trackReadSummaries) { var coverageSummary = alignment.GetCoverageSummary(); var block = GetBlock(coverageSummary.ClipAdjustedEndPosition); // store by end position so we can always be forward looking block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary); } }
// TODO revisit using anchor info to make deletion coverage more accurate, when we have the time... //private void CalculateDeletionCoverage(CalledAllele variant, IAlleleSource alleleCountSource, // int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true) //{ // var startPointCoverage = new[] { 0, 0, 0 }; // var endPointCoverage = new[] { 0, 0, 0 }; // var exactTotalCoverage = 0f; // var variantLength = variant.Length; // for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) // { // var minAnchor = variantLength + 1; // var startPointCoverageForDirectionTotal = 0; // var endPointCoverageForDirectionTotal = 0; // foreach (var alleleType in Constants.CoverageContributingAlleles) // { // var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, symmetric: true); // var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true, symmetric: true); // startPointCoverageForDirectionTotal += startPointCoverageForDirection; // endPointCoverageForDirectionTotal += endPointCoverageForDirection; // startPointCoverage[directionIndex] += startPointCoverageForDirection; // endPointCoverage[directionIndex] += endPointCoverageForDirection; // variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor); // variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true); // } // } // // coverage by strand direction is used for strand bias. need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them. // RedistributeStitchedCoverage(startPointCoverage); // RedistributeStitchedCoverage(endPointCoverage); // // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed) // for (var directionIndex = 0; directionIndex < 2; directionIndex++) // { // var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f : //will always round to lower. // Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]); // variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir; // exactTotalCoverage += exactCoverageForDir; // } // //for extended variants, coverage is not an exact value. // //Its an estimate based on the depth over the length of the variant. // //In particular, the depth by direction does not always allocate neatly to an integer value. // //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum // variant.TotalCoverage = (int)exactTotalCoverage; // variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport); //} /// <summary> /// Calculation for spanning variants requires looking at two datapoints and reconciling the coverage between the two. /// For insertions, take min of preceeding and trailing datapoints. /// For deletions and mnvs, take average of first and last datapoint for variant. /// jg todo - figure out this old comment - (Or if we're at the edge of the world, give up and just take the coverage of the left base) /// </summary> protected virtual void CalculateSpanning(CalledAllele variant, IAlleleSource alleleCountSource, int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true) { // TODO come back to this - now that we are tracking coverage more tightly we may be able to improve deletion spanning read count estimates as well //if (variant.Type == AlleleCategory.Deletion) //{ // CalculateDeletionCoverage(variant, alleleCountSource, startPointPosition, endPointPosition, // presumeAnchoredForExactCov); // return; //} //empty arrays to do our coverage calculations. the three spaces are for each read direction. var startPointCoverage = new[] { 0, 0, 0 }; var endPointCoverage = new[] { 0, 0, 0 }; var exactTotalCoverage = 0f; var confidentCoverageLeft = 0; var confidentCoverageRight = 0; var suspiciousCoverageLeft = 0; var suspiciousCoverageRight = 0; var firstBase = AlleleType.N; var lastBase = AlleleType.N; var bePickyAboutAnchors = _considerAnchorInformation && variant.Type == AlleleCategory.Insertion; if (bePickyAboutAnchors) { var firstBaseChar = variant.AlternateAllele[1]; firstBase = AlleleHelper.GetAlleleType(firstBaseChar); var lastBaseChar = variant.AlternateAllele[variant.AlternateAllele.Length - 1]; lastBase = AlleleHelper.GetAlleleType(lastBaseChar); } var startPointCoverageUnanchored = new[] { 0, 0, 0 }; var endPointCoverageUnanchored = new[] { 0, 0, 0 }; var unanchoredCoverageStartQuality = 0D; var unanchoredCoverageEndQuality = 0D; var unanchoredSupport = variant.AlleleSupport - variant.WellAnchoredSupport; // Track the relative coverages of each and then go back and use this to determine the weighting factor for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) { foreach (var alleleType in Constants.CoverageContributingAlleles) { var anchoredCoverageOnlyEnd = bePickyAboutAnchors && alleleType == firstBase; var anchoredCoverageOnlyStart = bePickyAboutAnchors && alleleType == lastBase; var minAnchorEnd = anchoredCoverageOnlyEnd ? variant.Length : 0; var minAnchorStart = anchoredCoverageOnlyStart ? variant.Length : 0; var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart); startPointCoverage[directionIndex] += startPointCoverageForDirection; var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true);; endPointCoverage[directionIndex] += endPointCoverageForDirection; confidentCoverageLeft += startPointCoverageForDirection; confidentCoverageRight += endPointCoverageForDirection; variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart); variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true); if (bePickyAboutAnchors && unanchoredSupport > 0) // Shortcut - if the unanchored support is 0 anyway, we're going to use 0 as our weight here and there's no point collecting this info { if (minAnchorStart > 0) { var unanchoredCoverageStartCount = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1); startPointCoverageUnanchored[directionIndex] += unanchoredCoverageStartCount; suspiciousCoverageLeft += unanchoredCoverageStartCount; // Need to adjust the windowed base qualities as well unanchoredCoverageStartQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1); } if (minAnchorEnd > 0) { var unanchoredCoverageEndCount = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, 0, fromEnd: true, maxAnchor: minAnchorEnd - 1); endPointCoverageUnanchored[directionIndex] += unanchoredCoverageEndCount; suspiciousCoverageRight += unanchoredCoverageEndCount; // Need to adjust the windowed base qualities as well unanchoredCoverageEndQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, fromEnd: true, maxAnchor: minAnchorEnd - 1); } } } } if (bePickyAboutAnchors) { var trulyAnchoredCoverage = (((confidentCoverageLeft - suspiciousCoverageRight) + (confidentCoverageRight - suspiciousCoverageLeft)) / 2f); var anchoredVariantFreq = trulyAnchoredCoverage <= 0 ? 0 : variant.WellAnchoredSupport / trulyAnchoredCoverage; var totalSuspiciousCoverage = suspiciousCoverageLeft + suspiciousCoverageRight; // Suspicious coverages are not likely to be from the same sources, so add rather than average var unanchoredVariantFreq = totalSuspiciousCoverage == 0 ? 0 : unanchoredSupport / ((float)totalSuspiciousCoverage); var variantSpecificUnanchoredWeight = Math.Max(0, anchoredVariantFreq == 0 ? 1 : Math.Min(1, unanchoredVariantFreq / anchoredVariantFreq)); variant.UnanchoredCoverageWeight = variantSpecificUnanchoredWeight; for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) { startPointCoverage[directionIndex] += (int)(startPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight); endPointCoverage[directionIndex] += (int)(endPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight); // GB: this will keep us consisent with how we were doing it before, but I find it rather odd that we're ADDING base quality from both sides and ultimately in ProcessVariant dividing that sum by the total coverage which is an average, not a sum, of each side's coverage. // Since we are dividing the total q score by the tot cov i didn't want it to get inflated by reducing the tot cov, so adjusted by the same facto // TJD response: Base quality is a log of a p value, so averaging them is not the same as summing them then dividing. If you have a bunch of Qscores, say 10 and 10 and 100, you DO NOT do (10+ 10+100)/3. You have to do Q10-> p 0.1 and 100 -> p 0.01 so avg(0.1,0.1,0.01) is ~ .2/3 = 0.0666 -> a Q of (what ever that ends up being)... just a computational trick, to do it in log space instead of normal space variant.SumOfBaseQuality += unanchoredCoverageStartQuality * variantSpecificUnanchoredWeight; variant.SumOfBaseQuality += unanchoredCoverageEndQuality * variantSpecificUnanchoredWeight; } } // coverage by strand direction is used for strand bias. need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them. RedistributeStitchedCoverage(startPointCoverage); RedistributeStitchedCoverage(endPointCoverage); // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed) for (var directionIndex = 0; directionIndex < 2; directionIndex++) { var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f : //will always round to lower. Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]); variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir; exactTotalCoverage += exactCoverageForDir; } //for extended variants, coverage is not an exact value. //Its an estimate based on the depth over the length of the variant. //In particular, the depth by direction does not always allocate neatly to an integer value. //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum variant.TotalCoverage = (int)exactTotalCoverage; variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport); variant.SuspiciousCoverageStart = suspiciousCoverageLeft; variant.ConfidentCoverageStart = confidentCoverageLeft; variant.SuspiciousCoverageEnd = suspiciousCoverageRight; variant.ConfidentCoverageEnd = confidentCoverageRight; }
public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference, ChrIntervalSet intervals = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null) { var alleles = new List <CandidateAllele>(); // add all candidates - these are potentially collapsable targets foreach (var positionLookup in _candidateVariantsLookup) { if (positionLookup != null) { alleles.AddRange(positionLookup); } } var IntervalsInUse = includeRefAlleles ? intervals : CreateIntervalsFromAllels(chrReference, forcesGtAlleles); if (includeRefAlleles || (forcesGtAlleles != null && forcesGtAlleles.Count != 0)) { var regionsToFetch = IntervalsInUse == null ? new List <Region> { this } // fetch whole block region : IntervalsInUse.GetClipped(this); // clip intervals to block region for (var i = 0; i < regionsToFetch.Count; i++) { var clippedInterval = regionsToFetch[i]; for (var position = clippedInterval.StartPosition; position <= clippedInterval.EndPosition; position++) { var positionIndex = position - StartPosition; // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided if (position > chrReference.Sequence.Length) { break; } var refBase = chrReference.Sequence[position - 1].ToString(); var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase); var refAllele = new CandidateAllele(chrReference.Name, position, refBase, refBase, AlleleCategory.Reference); // gather support for allele var totalSupport = 0; for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++) { for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) { var count = 0; for (int anchorIndex = 0; anchorIndex < NumAnchorIndexes; anchorIndex++) { var countForAnchorType = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex, anchorIndex]; count += countForAnchorType; } if (alleleTypeIndex == refBaseIndex) { refAllele.SupportByDirection[directionIndex] = count; // TODO this isn't really proven to be well-anchored, nor is it proven not to be //refAllele.WellAnchoredSupportByDirection[directionIndex] = count; } totalSupport += count; } } if (IntervalsInUse != null || totalSupport > 0) { alleles.Add(refAllele); } } } } return(alleles); }
private IEnumerable <CandidateAllele> ExtractSnvsFromOperation(Read alignment, string refChromosome, int opStartIndexInRead, uint operationLength, int opStartIndexInReference, string chromosomeName) { var candidateSingleNucleotideAlleles = new List <CandidateAllele>(); var variantLengthSoFar = 0; var interveningRefLengthSoFar = 0; for (var i = 0; i < operationLength; i++) { var qualityGoodEnough = alignment.Qualities[opStartIndexInRead + i] >= _minimumBaseCallQuality; var readBase = alignment.Sequence[opStartIndexInRead + i]; if (opStartIndexInReference + i >= refChromosome.Length) { break; } var refBase = refChromosome[opStartIndexInReference + i]; var atEndOfOperation = i == (operationLength - 1); var startingMnvAtEndOfOperation = (atEndOfOperation && variantLengthSoFar == 0); //Do not create/extend a variant if the quality isn't good enough or the allele or ref is an N if ((AlleleHelper.GetAlleleType(readBase) == AlleleType.N) || (AlleleHelper.GetAlleleType(refBase) == AlleleType.N) || !qualityGoodEnough) { FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar, opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles); variantLengthSoFar = 0; interveningRefLengthSoFar = 0; } else { if (BasesMatch(refBase, readBase)) { if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, true) && !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation { variantLengthSoFar++; interveningRefLengthSoFar++; } else { FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar, opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles); variantLengthSoFar = 0; interveningRefLengthSoFar = 0; } } else { if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, false) && !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation { variantLengthSoFar++; interveningRefLengthSoFar = 0; } else { FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar, opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles); variantLengthSoFar = 1; interveningRefLengthSoFar = 0; } } } } //Flush if we've gotten to the end FlushVariant(alignment, refChromosome, opStartIndexInRead + ((int)operationLength) - variantLengthSoFar, opStartIndexInReference + ((int)operationLength) - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles); return(candidateSingleNucleotideAlleles); }
public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference, ChrIntervalSet intervals = null) { var alleles = new List <CandidateAllele>(); // add all candidates - these are potentially collapsable targets foreach (var positionLookup in _candidateVariantsLookup) { if (positionLookup != null) { alleles.AddRange(positionLookup); } } if (includeRefAlleles) { var regionsToFetch = intervals == null ? new List <Region> { this } // fetch whole block region : intervals.GetClipped(this); // clip intervals to block region for (var i = 0; i < regionsToFetch.Count; i++) { var clippedInterval = regionsToFetch[i]; for (var position = clippedInterval.StartPosition; position <= clippedInterval.EndPosition; position++) { var positionIndex = position - StartPosition; // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided if (position > chrReference.Sequence.Length) { break; } var refBase = chrReference.Sequence[position - 1].ToString(); var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase); var refAllele = new CandidateAllele(chrReference.Name, position, refBase, refBase, AlleleCategory.Reference); // gather support for allele var totalSupport = 0; for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++) { for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) { var count = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex]; if (alleleTypeIndex == refBaseIndex) { refAllele.SupportByDirection[directionIndex] = count; } totalSupport += count; } } if (intervals != null || totalSupport > 0) { alleles.Add(refAllele); } } } } return(alleles); }
public void AddAlleleCounts(Read alignment) { if (!_readLength.HasValue) { _readLength = alignment.ReadLength; } var lastPosition = alignment.Position - 1; var cigarData = alignment.CigarData; var deletionLength = 0; var lengthBeforeDeletion = alignment.ReadLength; var endsInDeletion = cigarData.HasOperationAtOpIndex(0, 'D', true); var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true); if (endsInDeletion || endsInDeletionBeforeSoftclip) { deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length : cigarData[cigarData.Count - 1].Length); lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length : alignment.ReadLength); } for (var i = 0; i < alignment.PositionMap.Length; i++) { if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]); } } var position = alignment.PositionMap[i]; if (position == -1) { continue; // not mapped to reference } for (var j = lastPosition + 1; j < position; j++) // add any deletion counts { AddAlleleCount(j, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]); } var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]); if (alignment.Qualities[i] < _minBasecallQuality) { alleleType = AlleleType.N; // record this event as a no call } AddAlleleCount(position, alleleType, alignment.SequencedBaseDirectionMap[i]); AddAlleleBaseQuality(position, alleleType, alignment.SequencedBaseDirectionMap[i], Math.Pow(10, -1 * (int)alignment.Qualities[i] / 10f)); lastPosition = position; } if (endsInDeletion) { for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts { AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1]); } } // add coverage summary if (_trackReadSummaries) { var coverageSummary = alignment.GetCoverageSummary(); var block = GetBlock(coverageSummary.ClipAdjustedEndPosition); // store by end position so we can always be forward looking block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary); } }