Esempio n. 1
0
        private void AddAlleleCounts(Read alignment)
        {
            var lastPosition = alignment.Position - 1;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = alignment.CigarData.HasOperationAtOpIndex(1, 'D', true);
            var endsInDeletionBeforeSoftclip = alignment.CigarData.HasOperationAtOpIndex(2, 'D', true) && alignment.CigarData.HasOperationAtOpIndex(1, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? alignment.CigarData[alignment.CigarData.Count - 2].Length :
                                       alignment.CigarData[alignment.CigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - alignment.CigarData[alignment.CigarData.Count - 1].Length :  alignment.ReadLength);
            }

            for (var i = 0; i < alignment.PositionMap.Length; i++)
            {
                if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion)
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[i]);
                    }
                }

                var position = alignment.PositionMap[i];

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                {
                    AddAlleleCount(j, AlleleType.Deletion, alignment.DirectionMap[i]);
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]);

                if (alignment.Qualities[i] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, alignment.DirectionMap[i]);

                lastPosition = position;
            }

            if (endsInDeletion)
            {
                for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                {
                    AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[alignment.DirectionMap.Length - 1]);
                }
            }
        }
Esempio n. 2
0
 public void GetAlleleType()
 {
     Assert.Equal(AlleleType.A, AlleleHelper.GetAlleleType("A"));
     Assert.Equal(AlleleType.G, AlleleHelper.GetAlleleType("G"));
     Assert.Equal(AlleleType.C, AlleleHelper.GetAlleleType("C"));
     Assert.Equal(AlleleType.T, AlleleHelper.GetAlleleType("T"));
     Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("N"));
     Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("U"));
 }
        protected virtual void CalculateSinglePoint(CalledAllele allele, IAlleleSource alleleCountSource)
        {
            //TODO: Is there a reason why we don't reallocate the stitched coverage here for point mutations? (as we do with spanning ones)
            // sum up all observations at that point

            var variant = allele as CalledAllele;

            for (var direction = 0; direction < Constants.NumDirectionTypes; direction++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    allele.EstimatedCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.ReferencePosition, alleleType, (DirectionType)direction);
                    allele.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(allele.ReferencePosition, alleleType, (DirectionType)direction);

                    if (alleleType != AlleleHelper.GetAlleleType(allele.ReferenceAllele))
                    {
                        continue;
                    }
                    if (variant != null)
                    {
                        variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.ReferencePosition, alleleType,
                                                                                     (DirectionType)direction);
                    }
                }

                allele.TotalCoverage += allele.EstimatedCoverageByDirection[direction];

                // For single point variants, for now, we're calling everything confident coverage
                allele.ConfidentCoverageStart += allele.EstimatedCoverageByDirection[direction];
                allele.ConfidentCoverageEnd   += allele.EstimatedCoverageByDirection[direction];

                allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.ReferencePosition, AlleleType.N, (DirectionType)direction);
            }

            // adjust for reference counts already taken up by gapped mnvs

            // note: it's possible that the ref count taken up by a gapped mnv is greater than depth at that ref position.
            // this is possible when collapsing is true, and some gapped ref positions have low quality (or are N).
            // in these cases, they get collapsed to the mnv and count towards support, but those specific alleles were never added to region's allele counts because they are low quality.
            // collapsing is the correct thing to do, so this is ok.  we should just make sure to cap at 0.
            var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.ReferencePosition);

            if (allele.Type == AlleleCategory.Snv && variant != null)
            {
                variant.ReferenceSupport = Math.Max(0, variant.ReferenceSupport - gappedRefCounts);
            }
            else if (allele.Type == AlleleCategory.Reference)
            {
                allele.AlleleSupport = Math.Max(0, allele.AlleleSupport - gappedRefCounts);
            }
        }
Esempio n. 4
0
        private static void CalculateSinglePoint(BaseCalledAllele allele, IStateManager alleleCountSource)
        {
            var variant = allele as CalledVariant;

            for (var direction = 0; direction < Constants.NumDirectionTypes; direction++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    allele.TotalCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.Coordinate, alleleType, (DirectionType)direction);

                    if (alleleType != AlleleHelper.GetAlleleType(allele.Reference))
                    {
                        continue;
                    }
                    if (variant != null)
                    {
                        variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.Coordinate, alleleType,
                                                                                     (DirectionType)direction);
                    }
                }

                allele.TotalCoverage += allele.TotalCoverageByDirection[direction];

                allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.Coordinate, AlleleType.N, (DirectionType)direction);
            }

            // adjust for reference counts already taken up by gapped mnvs
            var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.Coordinate);

            if (allele.Type == AlleleCategory.Snv && variant != null)
            {
                variant.ReferenceSupport -= gappedRefCounts;
            }
            else if (allele.Type == AlleleCategory.Reference)
            {
                allele.AlleleSupport -= gappedRefCounts;
            }
        }
        public void AddAlleleCounts(Read alignment)
        {
            if (!_readLength.HasValue)
            {
                _readLength = alignment.ReadLength;
            }

            var lastPosition = alignment.Position - 1;

            var cigarData = alignment.CigarData;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = cigarData.HasOperationAtOpIndex(0, 'D', true);
            var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length :
                                       cigarData[cigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length :  alignment.ReadLength);
            }

            var positionMapLength      = alignment.PositionMap.Length;
            var alignmentEndPosition   = alignment.EndPosition;
            var alignmentStartPosition = alignment.Position;

            for (var positionMapIndex = 0; positionMapIndex < positionMapLength; positionMapIndex++)
            {
                DirectionType directionType = alignment.SequencedBaseDirectionMap[positionMapIndex];


                if ((endsInDeletionBeforeSoftclip) && positionMapIndex == lengthBeforeDeletion)
                {
                    if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality))
                    {
                        for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                        {
                            var anchorIndex = NumAnchorIndexes - 1;  // Last index
                            AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex);
                            AddCollapsedReadCount(j + lastPosition, alignment, directionType);
                        }
                    }
                }

                var position = alignment.PositionMap.GetPositionAtIndex(positionMapIndex);

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                var anchorType = GetAnchorType(alignmentEndPosition, position, alignmentStartPosition);

                //if the deletion is of decent quality, add it to the counts matix
                if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality))
                {
                    for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                    {
                        AddAlleleCount(j, AlleleType.Deletion, directionType, anchorType);
                        AddCollapsedReadCount(j, alignment, directionType);
                    }
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[positionMapIndex]);
                if (alignment.Qualities[positionMapIndex] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, directionType, anchorType);

                if (alleleType != AlleleType.N)
                {
                    AddCollapsedReadCount(position, alignment, directionType);
                    AddAmpliconCount(_trackAmpliconCounts, position, alignment.GetAmpliconNameIfExists());
                }

                AddAlleleBaseQuality(position, alleleType, directionType, Math.Pow(10, -1 * (int)alignment.Qualities[positionMapIndex] / 10f), anchorType);
                lastPosition = position;
            }

            if (endsInDeletion)
            {
                if (CandidateVariantFinder.CheckDeletionQuality(alignment, alignment.SequencedBaseDirectionMap.Length - 1, _minBasecallQuality))
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        DirectionType directionType =
                            alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1];

                        var anchorIndex = NumAnchorIndexes - 1;                                            // Last index

                        AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex); // ends in deletion -> within 1
                        AddCollapsedReadCount(j + lastPosition, alignment, directionType);
                    }
                }
            }

            // add coverage summary
            if (_trackReadSummaries)
            {
                var coverageSummary = alignment.GetCoverageSummary();
                var block           = GetBlock(coverageSummary.ClipAdjustedEndPosition);
                // store by end position so we can always be forward looking
                block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary);
            }
        }
        // TODO revisit using anchor info to make deletion coverage more accurate, when we have the time...
        //private void CalculateDeletionCoverage(CalledAllele variant, IAlleleSource alleleCountSource,
        //    int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true)
        //{
        //    var startPointCoverage = new[] { 0, 0, 0 };
        //    var endPointCoverage = new[] { 0, 0, 0 };
        //    var exactTotalCoverage = 0f;

        //    var variantLength = variant.Length;

        //    for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
        //    {
        //        var minAnchor = variantLength + 1;
        //        var startPointCoverageForDirectionTotal = 0;
        //        var endPointCoverageForDirectionTotal = 0;
        //        foreach (var alleleType in Constants.CoverageContributingAlleles)
        //        {
        //            var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, symmetric: true);
        //            var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true, symmetric: true);

        //            startPointCoverageForDirectionTotal += startPointCoverageForDirection;
        //            endPointCoverageForDirectionTotal += endPointCoverageForDirection;

        //            startPointCoverage[directionIndex] += startPointCoverageForDirection;
        //            endPointCoverage[directionIndex] += endPointCoverageForDirection;

        //            variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor);
        //            variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true);
        //        }
        //    }

        //    // coverage by strand direction is used for strand bias.  need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them.
        //    RedistributeStitchedCoverage(startPointCoverage);
        //    RedistributeStitchedCoverage(endPointCoverage);

        //    // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed)
        //    for (var directionIndex = 0; directionIndex < 2; directionIndex++)
        //    {
        //        var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f : //will always round to lower.
        //            Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]);
        //        variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir;

        //        exactTotalCoverage += exactCoverageForDir;
        //    }

        //    //for extended variants, coverage is not an exact value.
        //    //Its an estimate based on the depth over the length of the variant.
        //    //In particular, the depth by direction does not always allocate neatly to an integer value.

        //    //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum

        //    variant.TotalCoverage = (int)exactTotalCoverage;
        //    variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport);

        //}

        /// <summary>
        /// Calculation for spanning variants requires looking at two datapoints and reconciling the coverage between the two.
        /// For insertions, take min of preceeding and trailing datapoints.
        /// For deletions and mnvs, take average of first and last datapoint for variant.
        /// jg todo - figure out this old comment - (Or if we're at the edge of the world, give up and just take the coverage of the left base)
        /// </summary>
        protected virtual void CalculateSpanning(CalledAllele variant, IAlleleSource alleleCountSource, int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true)
        {
            // TODO come back to this - now that we are tracking coverage more tightly we may be able to improve deletion spanning read count estimates as well
            //if (variant.Type == AlleleCategory.Deletion)
            //{
            //    CalculateDeletionCoverage(variant, alleleCountSource, startPointPosition, endPointPosition,
            //        presumeAnchoredForExactCov);
            //    return;
            //}

            //empty arrays to do our coverage calculations.  the three spaces are for each read direction.
            var startPointCoverage = new[] { 0, 0, 0 };
            var endPointCoverage   = new[] { 0, 0, 0 };
            var exactTotalCoverage = 0f;

            var confidentCoverageLeft   = 0;
            var confidentCoverageRight  = 0;
            var suspiciousCoverageLeft  = 0;
            var suspiciousCoverageRight = 0;

            var firstBase           = AlleleType.N;
            var lastBase            = AlleleType.N;
            var bePickyAboutAnchors = _considerAnchorInformation && variant.Type == AlleleCategory.Insertion;

            if (bePickyAboutAnchors)
            {
                var firstBaseChar = variant.AlternateAllele[1];
                firstBase = AlleleHelper.GetAlleleType(firstBaseChar);

                var lastBaseChar = variant.AlternateAllele[variant.AlternateAllele.Length - 1];
                lastBase = AlleleHelper.GetAlleleType(lastBaseChar);
            }

            var startPointCoverageUnanchored   = new[] { 0, 0, 0 };
            var endPointCoverageUnanchored     = new[] { 0, 0, 0 };
            var unanchoredCoverageStartQuality = 0D;
            var unanchoredCoverageEndQuality   = 0D;

            var unanchoredSupport = variant.AlleleSupport - variant.WellAnchoredSupport;

            // Track the relative coverages of each and then go back and use this to determine the weighting factor

            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    var anchoredCoverageOnlyEnd   = bePickyAboutAnchors && alleleType == firstBase;
                    var anchoredCoverageOnlyStart = bePickyAboutAnchors && alleleType == lastBase;

                    var minAnchorEnd   = anchoredCoverageOnlyEnd ? variant.Length : 0;
                    var minAnchorStart = anchoredCoverageOnlyStart ? variant.Length : 0;

                    var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart);
                    startPointCoverage[directionIndex] += startPointCoverageForDirection;
                    var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true);;
                    endPointCoverage[directionIndex] += endPointCoverageForDirection;

                    confidentCoverageLeft  += startPointCoverageForDirection;
                    confidentCoverageRight += endPointCoverageForDirection;

                    variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart);
                    variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true);

                    if (bePickyAboutAnchors && unanchoredSupport > 0) // Shortcut - if the unanchored support is 0 anyway, we're going to use 0 as our weight here and there's no point collecting this info
                    {
                        if (minAnchorStart > 0)
                        {
                            var unanchoredCoverageStartCount = alleleCountSource.GetAlleleCount(startPointPosition,
                                                                                                alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1);
                            startPointCoverageUnanchored[directionIndex] += unanchoredCoverageStartCount;

                            suspiciousCoverageLeft += unanchoredCoverageStartCount;

                            // Need to adjust the windowed base qualities as well
                            unanchoredCoverageStartQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1);
                        }

                        if (minAnchorEnd > 0)
                        {
                            var unanchoredCoverageEndCount = alleleCountSource.GetAlleleCount(endPointPosition,
                                                                                              alleleType, (DirectionType)directionIndex, 0, fromEnd: true,
                                                                                              maxAnchor: minAnchorEnd - 1);
                            endPointCoverageUnanchored[directionIndex] += unanchoredCoverageEndCount;

                            suspiciousCoverageRight += unanchoredCoverageEndCount;

                            // Need to adjust the windowed base qualities as well
                            unanchoredCoverageEndQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, fromEnd: true,
                                                                                                          maxAnchor: minAnchorEnd - 1);
                        }
                    }
                }
            }

            if (bePickyAboutAnchors)
            {
                var trulyAnchoredCoverage = (((confidentCoverageLeft - suspiciousCoverageRight) +
                                              (confidentCoverageRight - suspiciousCoverageLeft)) / 2f);

                var anchoredVariantFreq =
                    trulyAnchoredCoverage <= 0 ? 0 : variant.WellAnchoredSupport / trulyAnchoredCoverage;

                var totalSuspiciousCoverage =
                    suspiciousCoverageLeft +
                    suspiciousCoverageRight; // Suspicious coverages are not likely to be from the same sources, so add rather than average
                var unanchoredVariantFreq = totalSuspiciousCoverage == 0
                    ? 0
                    : unanchoredSupport / ((float)totalSuspiciousCoverage);
                var variantSpecificUnanchoredWeight = Math.Max(0, anchoredVariantFreq == 0
                    ? 1
                    : Math.Min(1, unanchoredVariantFreq / anchoredVariantFreq));
                variant.UnanchoredCoverageWeight = variantSpecificUnanchoredWeight;

                for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                {
                    startPointCoverage[directionIndex] +=
                        (int)(startPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight);
                    endPointCoverage[directionIndex] +=
                        (int)(endPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight);

                    // GB: this will keep us consisent with how we were doing it before, but I find it rather odd that we're ADDING base quality from both sides and ultimately in ProcessVariant dividing that sum by the total coverage which is an average, not a sum, of each side's coverage.
                    // Since we are dividing the total q score by the tot cov i didn't want it to get inflated by reducing the tot cov, so adjusted by the same facto
                    // TJD response: Base quality is a log of a p value, so averaging them is not the same as summing them then dividing. If you have a bunch of Qscores, say 10 and 10 and 100, you DO NOT do (10+ 10+100)/3. You have to do Q10-> p 0.1 and 100 -> p 0.01 so avg(0.1,0.1,0.01) is ~ .2/3 = 0.0666 -> a Q of (what ever that ends up being)... just a computational trick, to do it in log space instead of normal space
                    variant.SumOfBaseQuality += unanchoredCoverageStartQuality * variantSpecificUnanchoredWeight;
                    variant.SumOfBaseQuality += unanchoredCoverageEndQuality * variantSpecificUnanchoredWeight;
                }
            }

            // coverage by strand direction is used for strand bias.  need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them.
            RedistributeStitchedCoverage(startPointCoverage);
            RedistributeStitchedCoverage(endPointCoverage);

            // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed)
            for (var directionIndex = 0; directionIndex < 2; directionIndex++)
            {
                var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f :   //will always round to lower.
                                          Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]);
                variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir;

                exactTotalCoverage += exactCoverageForDir;
            }

            //for extended variants, coverage is not an exact value.
            //Its an estimate based on the depth over the length of the variant.
            //In particular, the depth by direction does not always allocate neatly to an integer value.

            //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum

            variant.TotalCoverage           = (int)exactTotalCoverage;
            variant.ReferenceSupport        = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport);
            variant.SuspiciousCoverageStart = suspiciousCoverageLeft;
            variant.ConfidentCoverageStart  = confidentCoverageLeft;
            variant.SuspiciousCoverageEnd   = suspiciousCoverageRight;
            variant.ConfidentCoverageEnd    = confidentCoverageRight;
        }
Esempio n. 7
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            var IntervalsInUse = includeRefAlleles ? intervals : CreateIntervalsFromAllels(chrReference, forcesGtAlleles);

            if (includeRefAlleles || (forcesGtAlleles != null && forcesGtAlleles.Count != 0))
            {
                var regionsToFetch = IntervalsInUse == null
                    ? new List <Region> {
                    this
                }                                      // fetch whole block region
                    : IntervalsInUse.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = 0;
                                for (int anchorIndex = 0; anchorIndex < NumAnchorIndexes; anchorIndex++)
                                {
                                    var countForAnchorType = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex, anchorIndex];
                                    count += countForAnchorType;
                                }

                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;

                                    // TODO this isn't really proven to be well-anchored, nor is it proven not to be
                                    //refAllele.WellAnchoredSupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (IntervalsInUse != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
Esempio n. 8
0
        private IEnumerable <CandidateAllele> ExtractSnvsFromOperation(Read alignment, string refChromosome, int opStartIndexInRead, uint operationLength, int opStartIndexInReference, string chromosomeName)
        {
            var candidateSingleNucleotideAlleles = new List <CandidateAllele>();
            var variantLengthSoFar        = 0;
            var interveningRefLengthSoFar = 0;

            for (var i = 0; i < operationLength; i++)
            {
                var qualityGoodEnough = alignment.Qualities[opStartIndexInRead + i] >= _minimumBaseCallQuality;

                var readBase = alignment.Sequence[opStartIndexInRead + i];
                if (opStartIndexInReference + i >= refChromosome.Length)
                {
                    break;
                }
                var refBase = refChromosome[opStartIndexInReference + i];

                var atEndOfOperation            = i == (operationLength - 1);
                var startingMnvAtEndOfOperation = (atEndOfOperation && variantLengthSoFar == 0);

                //Do not create/extend a variant if the quality isn't good enough or the allele or ref is an N
                if ((AlleleHelper.GetAlleleType(readBase) == AlleleType.N) || (AlleleHelper.GetAlleleType(refBase) == AlleleType.N) || !qualityGoodEnough)
                {
                    FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                 opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar,
                                 interveningRefLengthSoFar, candidateSingleNucleotideAlleles);
                    variantLengthSoFar        = 0;
                    interveningRefLengthSoFar = 0;
                }
                else
                {
                    if (BasesMatch(refBase, readBase))
                    {
                        if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, true) &&
                            !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation
                        {
                            variantLengthSoFar++;
                            interveningRefLengthSoFar++;
                        }
                        else
                        {
                            FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                         opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar,
                                         interveningRefLengthSoFar, candidateSingleNucleotideAlleles);
                            variantLengthSoFar        = 0;
                            interveningRefLengthSoFar = 0;
                        }
                    }
                    else
                    {
                        if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, false) &&
                            !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation
                        {
                            variantLengthSoFar++;
                            interveningRefLengthSoFar = 0;
                        }
                        else
                        {
                            FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                         opStartIndexInReference + i - variantLengthSoFar,
                                         chromosomeName, variantLengthSoFar, interveningRefLengthSoFar,
                                         candidateSingleNucleotideAlleles);
                            variantLengthSoFar        = 1;
                            interveningRefLengthSoFar = 0;
                        }
                    }
                }
            }
            //Flush if we've gotten to the end
            FlushVariant(alignment, refChromosome, opStartIndexInRead + ((int)operationLength) - variantLengthSoFar, opStartIndexInReference + ((int)operationLength) - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles);


            return(candidateSingleNucleotideAlleles);
        }
Esempio n. 9
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            if (includeRefAlleles)
            {
                var regionsToFetch = intervals == null
                    ? new List <Region> {
                    this
                }                                 // fetch whole block region
                    : intervals.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex];
                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (intervals != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
Esempio n. 10
0
        public void AddAlleleCounts(Read alignment)
        {
            if (!_readLength.HasValue)
            {
                _readLength = alignment.ReadLength;
            }

            var lastPosition = alignment.Position - 1;

            var cigarData = alignment.CigarData;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = cigarData.HasOperationAtOpIndex(0, 'D', true);
            var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length :
                                       cigarData[cigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length :  alignment.ReadLength);
            }

            for (var i = 0; i < alignment.PositionMap.Length; i++)
            {
                if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion)
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]);
                    }
                }

                var position = alignment.PositionMap[i];

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                {
                    AddAlleleCount(j, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]);
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]);

                if (alignment.Qualities[i] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, alignment.SequencedBaseDirectionMap[i]);
                AddAlleleBaseQuality(position, alleleType, alignment.SequencedBaseDirectionMap[i], Math.Pow(10, -1 * (int)alignment.Qualities[i] / 10f));

                lastPosition = position;
            }

            if (endsInDeletion)
            {
                for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                {
                    AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1]);
                }
            }

            // add coverage summary
            if (_trackReadSummaries)
            {
                var coverageSummary = alignment.GetCoverageSummary();
                var block           = GetBlock(coverageSummary.ClipAdjustedEndPosition);
                // store by end position so we can always be forward looking
                block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary);
            }
        }