Esempio n. 1
0
        private CandidateAllele GetMatches(CandidateAllele toCollapse, IEnumerable <CandidateAllele> targets, IAlleleSource source)
        {
            var potentialMatches = targets.Where(c => CanCollapse(toCollapse, c) &&
                                                 c != toCollapse).ToList();

            if (potentialMatches.Count == 0)
            {
                return(null);
            }

            // reset frequency - could have changed from last time fetched
            foreach (var variant in potentialMatches)
            {
                var callableVariant = AlleleHelper.Map(variant);
                _coverageCalculator.Compute(callableVariant, source);
                variant.Frequency = callableVariant.Frequency;
            }

            // to collapse frequency
            var toCollapseCallableVariant = AlleleHelper.Map(toCollapse);

            _coverageCalculator.Compute(toCollapseCallableVariant, source);

            potentialMatches.Sort(this);

            // if there's an exact match to a fully anchored variant, take that first
            // otherwise take the most likely potential match
            var exactMatch = potentialMatches.FirstOrDefault(m => m.Equals(toCollapse) && !m.OpenOnLeft && !m.OpenOnRight);

            // if no exact match to fully anchored, take first potential match that meets threshold requirements
            return(exactMatch ?? potentialMatches.FirstOrDefault(m => m.Frequency >= _freqThreshold && m.Frequency / toCollapseCallableVariant.Frequency > _freqRatioThreshold));
        }
Esempio n. 2
0
 public void GetAlleleType()
 {
     Assert.Equal(AlleleType.A, AlleleHelper.GetAlleleType("A"));
     Assert.Equal(AlleleType.G, AlleleHelper.GetAlleleType("G"));
     Assert.Equal(AlleleType.C, AlleleHelper.GetAlleleType("C"));
     Assert.Equal(AlleleType.T, AlleleHelper.GetAlleleType("T"));
     Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("N"));
     Assert.Equal(AlleleType.N, AlleleHelper.GetAlleleType("U"));
 }
Esempio n. 3
0
        private void AddAlleleCounts(Read alignment)
        {
            var lastPosition = alignment.Position - 1;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = alignment.CigarData.HasOperationAtOpIndex(1, 'D', true);
            var endsInDeletionBeforeSoftclip = alignment.CigarData.HasOperationAtOpIndex(2, 'D', true) && alignment.CigarData.HasOperationAtOpIndex(1, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? alignment.CigarData[alignment.CigarData.Count - 2].Length :
                                       alignment.CigarData[alignment.CigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - alignment.CigarData[alignment.CigarData.Count - 1].Length :  alignment.ReadLength);
            }

            for (var i = 0; i < alignment.PositionMap.Length; i++)
            {
                if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion)
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[i]);
                    }
                }

                var position = alignment.PositionMap[i];

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                {
                    AddAlleleCount(j, AlleleType.Deletion, alignment.DirectionMap[i]);
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]);

                if (alignment.Qualities[i] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, alignment.DirectionMap[i]);

                lastPosition = position;
            }

            if (endsInDeletion)
            {
                for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                {
                    AddAlleleCount((int)j + lastPosition, AlleleType.Deletion, alignment.DirectionMap[alignment.DirectionMap.Length - 1]);
                }
            }
        }
        private bool IsCallable(CandidateIndel indel, IAlleleSource alleleSource)
        {
            // set frequency
            var callable = AlleleHelper.Map(indel);

            _coverageCalculator.Compute(callable, alleleSource);
            indel.Frequency = callable.Frequency;

            return(indel.IsKnown || callable.Frequency >= _frequencyCutoff);
        }
        protected virtual void CalculateSinglePoint(CalledAllele allele, IAlleleSource alleleCountSource)
        {
            //TODO: Is there a reason why we don't reallocate the stitched coverage here for point mutations? (as we do with spanning ones)
            // sum up all observations at that point

            var variant = allele as CalledAllele;

            for (var direction = 0; direction < Constants.NumDirectionTypes; direction++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    allele.EstimatedCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.ReferencePosition, alleleType, (DirectionType)direction);
                    allele.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(allele.ReferencePosition, alleleType, (DirectionType)direction);

                    if (alleleType != AlleleHelper.GetAlleleType(allele.ReferenceAllele))
                    {
                        continue;
                    }
                    if (variant != null)
                    {
                        variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.ReferencePosition, alleleType,
                                                                                     (DirectionType)direction);
                    }
                }

                allele.TotalCoverage += allele.EstimatedCoverageByDirection[direction];

                // For single point variants, for now, we're calling everything confident coverage
                allele.ConfidentCoverageStart += allele.EstimatedCoverageByDirection[direction];
                allele.ConfidentCoverageEnd   += allele.EstimatedCoverageByDirection[direction];

                allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.ReferencePosition, AlleleType.N, (DirectionType)direction);
            }

            // adjust for reference counts already taken up by gapped mnvs

            // note: it's possible that the ref count taken up by a gapped mnv is greater than depth at that ref position.
            // this is possible when collapsing is true, and some gapped ref positions have low quality (or are N).
            // in these cases, they get collapsed to the mnv and count towards support, but those specific alleles were never added to region's allele counts because they are low quality.
            // collapsing is the correct thing to do, so this is ok.  we should just make sure to cap at 0.
            var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.ReferencePosition);

            if (allele.Type == AlleleCategory.Snv && variant != null)
            {
                variant.ReferenceSupport = Math.Max(0, variant.ReferenceSupport - gappedRefCounts);
            }
            else if (allele.Type == AlleleCategory.Reference)
            {
                allele.AlleleSupport = Math.Max(0, allele.AlleleSupport - gappedRefCounts);
            }
        }
Esempio n. 6
0
        private static void CalculateSinglePoint(BaseCalledAllele allele, IStateManager alleleCountSource)
        {
            var variant = allele as CalledVariant;

            for (var direction = 0; direction < Constants.NumDirectionTypes; direction++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    allele.TotalCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.Coordinate, alleleType, (DirectionType)direction);

                    if (alleleType != AlleleHelper.GetAlleleType(allele.Reference))
                    {
                        continue;
                    }
                    if (variant != null)
                    {
                        variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.Coordinate, alleleType,
                                                                                     (DirectionType)direction);
                    }
                }

                allele.TotalCoverage += allele.TotalCoverageByDirection[direction];

                allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.Coordinate, AlleleType.N, (DirectionType)direction);
            }

            // adjust for reference counts already taken up by gapped mnvs
            var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.Coordinate);

            if (allele.Type == AlleleCategory.Snv && variant != null)
            {
                variant.ReferenceSupport -= gappedRefCounts;
            }
            else if (allele.Type == AlleleCategory.Reference)
            {
                allele.AlleleSupport -= gappedRefCounts;
            }
        }
Esempio n. 7
0
        public void MapToCandidateAllele()
        {
            var allele = new CalledAllele();

            allele.Chromosome         = "chr1";
            allele.Coordinate         = 1;
            allele.Reference          = "A";
            allele.Alternate          = "T";
            allele.Type               = AlleleCategory.Snv;
            allele.SupportByDirection = new[] { 10, 20, 30 };
            var mappedAllele = AlleleHelper.Map(allele);

            Assert.Equal(mappedAllele.Chromosome, allele.Chromosome);
            Assert.Equal(mappedAllele.Coordinate, allele.Coordinate);
            Assert.Equal(mappedAllele.Reference, allele.Reference);
            Assert.Equal(mappedAllele.Alternate, allele.Alternate);
            Assert.Equal(mappedAllele.Type, allele.Type);
            Assert.Equal(mappedAllele.SupportByDirection.Count(), allele.SupportByDirection.Count());
            for (int i = 0; i < allele.SupportByDirection.Count(); i++)
            {
                Assert.Equal(mappedAllele.SupportByDirection[i], allele.SupportByDirection[i]);
            }
        }
Esempio n. 8
0
        public void MapToBaseCalledAllele()
        {
            //Called variant
            var allele = new CandidateAllele("chr1", 1, "A", "G", AlleleCategory.Snv);

            allele.SupportByDirection = new[] { 10, 20, 30 };
            var BaseCalledAllele = AlleleHelper.Map(allele);

            Assert.True(BaseCalledAllele.Type != AlleleCategory.Reference);
            Assert.Equal(BaseCalledAllele.Chromosome, allele.Chromosome);
            Assert.Equal(BaseCalledAllele.Coordinate, allele.Coordinate);
            Assert.Equal(BaseCalledAllele.Reference, allele.Reference);
            Assert.Equal(BaseCalledAllele.Alternate, allele.Alternate);
            Assert.Equal(BaseCalledAllele.Type, allele.Type);
            Assert.Equal(BaseCalledAllele.SupportByDirection.Count(), allele.SupportByDirection.Count());
            for (int i = 0; i < allele.SupportByDirection.Count(); i++)
            {
                Assert.Equal(BaseCalledAllele.SupportByDirection[i], allele.SupportByDirection[i]);
            }

            //Called reference
            allele.Type = AlleleCategory.Reference;
            var calledReference = AlleleHelper.Map(allele);

            Assert.True(calledReference.Type == AlleleCategory.Reference);
            Assert.Equal(calledReference.Chromosome, allele.Chromosome);
            Assert.Equal(calledReference.Coordinate, allele.Coordinate);
            Assert.Equal(calledReference.Reference, allele.Reference);
            Assert.Equal(calledReference.Alternate, allele.Alternate);
            Assert.Equal(calledReference.Type, allele.Type);
            Assert.Equal(calledReference.SupportByDirection.Count(), allele.SupportByDirection.Count());
            for (int i = 0; i < allele.SupportByDirection.Count(); i++)
            {
                Assert.Equal(calledReference.SupportByDirection[i], allele.SupportByDirection[i]);
            }
        }
        public void AddAlleleCounts(Read alignment)
        {
            if (!_readLength.HasValue)
            {
                _readLength = alignment.ReadLength;
            }

            var lastPosition = alignment.Position - 1;

            var cigarData = alignment.CigarData;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = cigarData.HasOperationAtOpIndex(0, 'D', true);
            var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length :
                                       cigarData[cigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length :  alignment.ReadLength);
            }

            var positionMapLength      = alignment.PositionMap.Length;
            var alignmentEndPosition   = alignment.EndPosition;
            var alignmentStartPosition = alignment.Position;

            for (var positionMapIndex = 0; positionMapIndex < positionMapLength; positionMapIndex++)
            {
                DirectionType directionType = alignment.SequencedBaseDirectionMap[positionMapIndex];


                if ((endsInDeletionBeforeSoftclip) && positionMapIndex == lengthBeforeDeletion)
                {
                    if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality))
                    {
                        for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                        {
                            var anchorIndex = NumAnchorIndexes - 1;  // Last index
                            AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex);
                            AddCollapsedReadCount(j + lastPosition, alignment, directionType);
                        }
                    }
                }

                var position = alignment.PositionMap.GetPositionAtIndex(positionMapIndex);

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                var anchorType = GetAnchorType(alignmentEndPosition, position, alignmentStartPosition);

                //if the deletion is of decent quality, add it to the counts matix
                if (CandidateVariantFinder.CheckDeletionQuality(alignment, positionMapIndex, _minBasecallQuality))
                {
                    for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                    {
                        AddAlleleCount(j, AlleleType.Deletion, directionType, anchorType);
                        AddCollapsedReadCount(j, alignment, directionType);
                    }
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[positionMapIndex]);
                if (alignment.Qualities[positionMapIndex] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, directionType, anchorType);

                if (alleleType != AlleleType.N)
                {
                    AddCollapsedReadCount(position, alignment, directionType);
                    AddAmpliconCount(_trackAmpliconCounts, position, alignment.GetAmpliconNameIfExists());
                }

                AddAlleleBaseQuality(position, alleleType, directionType, Math.Pow(10, -1 * (int)alignment.Qualities[positionMapIndex] / 10f), anchorType);
                lastPosition = position;
            }

            if (endsInDeletion)
            {
                if (CandidateVariantFinder.CheckDeletionQuality(alignment, alignment.SequencedBaseDirectionMap.Length - 1, _minBasecallQuality))
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        DirectionType directionType =
                            alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1];

                        var anchorIndex = NumAnchorIndexes - 1;                                            // Last index

                        AddAlleleCount(j + lastPosition, AlleleType.Deletion, directionType, anchorIndex); // ends in deletion -> within 1
                        AddCollapsedReadCount(j + lastPosition, alignment, directionType);
                    }
                }
            }

            // add coverage summary
            if (_trackReadSummaries)
            {
                var coverageSummary = alignment.GetCoverageSummary();
                var block           = GetBlock(coverageSummary.ClipAdjustedEndPosition);
                // store by end position so we can always be forward looking
                block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary);
            }
        }
Esempio n. 10
0
        private SortedList <int, List <CalledAllele> > CallForPositions(List <CandidateAllele> candidates, IAlleleSource source, int?maxPosition)
        {
            var calledAllelesByPosition = new SortedList <int, List <CalledAllele> >();
            var failedMnvs      = new List <CalledAllele>();
            var callableAlleles = new List <CalledAllele>();

            if (_collapser != null)
            {
                candidates = _collapser.Collapse(candidates.ToList(), source, maxPosition);
            }

            foreach (var candidate in candidates)
            {
                var variant = AlleleHelper.Map(candidate);

                if (variant.Type == AlleleCategory.Mnv)
                {
                    ProcessVariant(source, variant);
                    if (IsCallable(variant))
                    {
                        callableAlleles.Add(variant);
                    }
                    else
                    {
                        failedMnvs.Add(variant);
                    }
                }

                else
                {
                    callableAlleles.Add(variant);
                }
            }

            var leftoversInNextBlock = MnvReallocator.ReallocateFailedMnvs(failedMnvs, callableAlleles, maxPosition);

            source.AddCandidates(leftoversInNextBlock.Select(AlleleHelper.Map));

            source.AddGappedMnvRefCount(GetRefSupportFromGappedMnvs(callableAlleles));

            // need to re-process variants since they may have additional support
            foreach (var baseCalledAllele in callableAlleles)
            {
                ProcessVariant(source, baseCalledAllele);
                if (IsCallable(baseCalledAllele) && ShouldReport(baseCalledAllele))
                {
                    List <CalledAllele> calledAtPosition;
                    if (!calledAllelesByPosition.TryGetValue(baseCalledAllele.Coordinate, out calledAtPosition))
                    {
                        calledAtPosition = new List <CalledAllele>();
                        calledAllelesByPosition.Add(baseCalledAllele.Coordinate, calledAtPosition);
                    }

                    calledAtPosition.Add(baseCalledAllele);
                }
            }

            // re-process variants by loci to get GT (to potentially take into account multiple var alleles at same loci)
            // and prune allele lists as needed.
            foreach (var allelesAtPosition in calledAllelesByPosition.Values)
            {
                //pruning ref calls
                if (allelesAtPosition.Any(v => v.Type != AlleleCategory.Reference))//(v => v is BaseCalledAllele))
                {
                    allelesAtPosition.RemoveAll(v => (v.Type == AlleleCategory.Reference));
                }

                //set GT and GT score, and prune any variant calls that exceed the ploidy model
                var allelesToPrune = _genotypeCalculator.SetGenotypes(allelesAtPosition);

                foreach (var alleleToPrune in allelesToPrune)
                {
                    allelesAtPosition.Remove(alleleToPrune);
                }

                foreach (var allele in allelesAtPosition)
                {
                    if (_config.LowGTqFilter.HasValue && allele.GenotypeQscore < _config.LowGTqFilter)
                    {
                        allele.AddFilter(FilterType.LowGenotypeQuality);
                    }
                }


                allelesAtPosition.Sort((a1, a2) =>
                {
                    var refCompare = a1.Reference.CompareTo(a2.Reference);
                    return(refCompare == 0 ? a1.Alternate.CompareTo(a2.Alternate) : refCompare);
                });
            }

            return(calledAllelesByPosition);
        }
Esempio n. 11
0
        // TODO revisit using anchor info to make deletion coverage more accurate, when we have the time...
        //private void CalculateDeletionCoverage(CalledAllele variant, IAlleleSource alleleCountSource,
        //    int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true)
        //{
        //    var startPointCoverage = new[] { 0, 0, 0 };
        //    var endPointCoverage = new[] { 0, 0, 0 };
        //    var exactTotalCoverage = 0f;

        //    var variantLength = variant.Length;

        //    for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
        //    {
        //        var minAnchor = variantLength + 1;
        //        var startPointCoverageForDirectionTotal = 0;
        //        var endPointCoverageForDirectionTotal = 0;
        //        foreach (var alleleType in Constants.CoverageContributingAlleles)
        //        {
        //            var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, symmetric: true);
        //            var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true, symmetric: true);

        //            startPointCoverageForDirectionTotal += startPointCoverageForDirection;
        //            endPointCoverageForDirectionTotal += endPointCoverageForDirection;

        //            startPointCoverage[directionIndex] += startPointCoverageForDirection;
        //            endPointCoverage[directionIndex] += endPointCoverageForDirection;

        //            variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchor);
        //            variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchor, fromEnd: true);
        //        }
        //    }

        //    // coverage by strand direction is used for strand bias.  need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them.
        //    RedistributeStitchedCoverage(startPointCoverage);
        //    RedistributeStitchedCoverage(endPointCoverage);

        //    // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed)
        //    for (var directionIndex = 0; directionIndex < 2; directionIndex++)
        //    {
        //        var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f : //will always round to lower.
        //            Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]);
        //        variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir;

        //        exactTotalCoverage += exactCoverageForDir;
        //    }

        //    //for extended variants, coverage is not an exact value.
        //    //Its an estimate based on the depth over the length of the variant.
        //    //In particular, the depth by direction does not always allocate neatly to an integer value.

        //    //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum

        //    variant.TotalCoverage = (int)exactTotalCoverage;
        //    variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport);

        //}

        /// <summary>
        /// Calculation for spanning variants requires looking at two datapoints and reconciling the coverage between the two.
        /// For insertions, take min of preceeding and trailing datapoints.
        /// For deletions and mnvs, take average of first and last datapoint for variant.
        /// jg todo - figure out this old comment - (Or if we're at the edge of the world, give up and just take the coverage of the left base)
        /// </summary>
        protected virtual void CalculateSpanning(CalledAllele variant, IAlleleSource alleleCountSource, int startPointPosition, int endPointPosition, bool presumeAnchoredForExactCov = true)
        {
            // TODO come back to this - now that we are tracking coverage more tightly we may be able to improve deletion spanning read count estimates as well
            //if (variant.Type == AlleleCategory.Deletion)
            //{
            //    CalculateDeletionCoverage(variant, alleleCountSource, startPointPosition, endPointPosition,
            //        presumeAnchoredForExactCov);
            //    return;
            //}

            //empty arrays to do our coverage calculations.  the three spaces are for each read direction.
            var startPointCoverage = new[] { 0, 0, 0 };
            var endPointCoverage   = new[] { 0, 0, 0 };
            var exactTotalCoverage = 0f;

            var confidentCoverageLeft   = 0;
            var confidentCoverageRight  = 0;
            var suspiciousCoverageLeft  = 0;
            var suspiciousCoverageRight = 0;

            var firstBase           = AlleleType.N;
            var lastBase            = AlleleType.N;
            var bePickyAboutAnchors = _considerAnchorInformation && variant.Type == AlleleCategory.Insertion;

            if (bePickyAboutAnchors)
            {
                var firstBaseChar = variant.AlternateAllele[1];
                firstBase = AlleleHelper.GetAlleleType(firstBaseChar);

                var lastBaseChar = variant.AlternateAllele[variant.AlternateAllele.Length - 1];
                lastBase = AlleleHelper.GetAlleleType(lastBaseChar);
            }

            var startPointCoverageUnanchored   = new[] { 0, 0, 0 };
            var endPointCoverageUnanchored     = new[] { 0, 0, 0 };
            var unanchoredCoverageStartQuality = 0D;
            var unanchoredCoverageEndQuality   = 0D;

            var unanchoredSupport = variant.AlleleSupport - variant.WellAnchoredSupport;

            // Track the relative coverages of each and then go back and use this to determine the weighting factor

            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    var anchoredCoverageOnlyEnd   = bePickyAboutAnchors && alleleType == firstBase;
                    var anchoredCoverageOnlyStart = bePickyAboutAnchors && alleleType == lastBase;

                    var minAnchorEnd   = anchoredCoverageOnlyEnd ? variant.Length : 0;
                    var minAnchorStart = anchoredCoverageOnlyStart ? variant.Length : 0;

                    var startPointCoverageForDirection = alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart);
                    startPointCoverage[directionIndex] += startPointCoverageForDirection;
                    var endPointCoverageForDirection = alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true);;
                    endPointCoverage[directionIndex] += endPointCoverageForDirection;

                    confidentCoverageLeft  += startPointCoverageForDirection;
                    confidentCoverageRight += endPointCoverageForDirection;

                    variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, minAnchorStart);
                    variant.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(endPointPosition, alleleType, (DirectionType)directionIndex, minAnchorEnd, fromEnd: true);

                    if (bePickyAboutAnchors && unanchoredSupport > 0) // Shortcut - if the unanchored support is 0 anyway, we're going to use 0 as our weight here and there's no point collecting this info
                    {
                        if (minAnchorStart > 0)
                        {
                            var unanchoredCoverageStartCount = alleleCountSource.GetAlleleCount(startPointPosition,
                                                                                                alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1);
                            startPointCoverageUnanchored[directionIndex] += unanchoredCoverageStartCount;

                            suspiciousCoverageLeft += unanchoredCoverageStartCount;

                            // Need to adjust the windowed base qualities as well
                            unanchoredCoverageStartQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, maxAnchor: minAnchorStart - 1);
                        }

                        if (minAnchorEnd > 0)
                        {
                            var unanchoredCoverageEndCount = alleleCountSource.GetAlleleCount(endPointPosition,
                                                                                              alleleType, (DirectionType)directionIndex, 0, fromEnd: true,
                                                                                              maxAnchor: minAnchorEnd - 1);
                            endPointCoverageUnanchored[directionIndex] += unanchoredCoverageEndCount;

                            suspiciousCoverageRight += unanchoredCoverageEndCount;

                            // Need to adjust the windowed base qualities as well
                            unanchoredCoverageEndQuality += alleleCountSource.GetSumOfAlleleBaseQualities(startPointPosition, alleleType, (DirectionType)directionIndex, 0, fromEnd: true,
                                                                                                          maxAnchor: minAnchorEnd - 1);
                        }
                    }
                }
            }

            if (bePickyAboutAnchors)
            {
                var trulyAnchoredCoverage = (((confidentCoverageLeft - suspiciousCoverageRight) +
                                              (confidentCoverageRight - suspiciousCoverageLeft)) / 2f);

                var anchoredVariantFreq =
                    trulyAnchoredCoverage <= 0 ? 0 : variant.WellAnchoredSupport / trulyAnchoredCoverage;

                var totalSuspiciousCoverage =
                    suspiciousCoverageLeft +
                    suspiciousCoverageRight; // Suspicious coverages are not likely to be from the same sources, so add rather than average
                var unanchoredVariantFreq = totalSuspiciousCoverage == 0
                    ? 0
                    : unanchoredSupport / ((float)totalSuspiciousCoverage);
                var variantSpecificUnanchoredWeight = Math.Max(0, anchoredVariantFreq == 0
                    ? 1
                    : Math.Min(1, unanchoredVariantFreq / anchoredVariantFreq));
                variant.UnanchoredCoverageWeight = variantSpecificUnanchoredWeight;

                for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                {
                    startPointCoverage[directionIndex] +=
                        (int)(startPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight);
                    endPointCoverage[directionIndex] +=
                        (int)(endPointCoverageUnanchored[directionIndex] * variantSpecificUnanchoredWeight);

                    // GB: this will keep us consisent with how we were doing it before, but I find it rather odd that we're ADDING base quality from both sides and ultimately in ProcessVariant dividing that sum by the total coverage which is an average, not a sum, of each side's coverage.
                    // Since we are dividing the total q score by the tot cov i didn't want it to get inflated by reducing the tot cov, so adjusted by the same facto
                    // TJD response: Base quality is a log of a p value, so averaging them is not the same as summing them then dividing. If you have a bunch of Qscores, say 10 and 10 and 100, you DO NOT do (10+ 10+100)/3. You have to do Q10-> p 0.1 and 100 -> p 0.01 so avg(0.1,0.1,0.01) is ~ .2/3 = 0.0666 -> a Q of (what ever that ends up being)... just a computational trick, to do it in log space instead of normal space
                    variant.SumOfBaseQuality += unanchoredCoverageStartQuality * variantSpecificUnanchoredWeight;
                    variant.SumOfBaseQuality += unanchoredCoverageEndQuality * variantSpecificUnanchoredWeight;
                }
            }

            // coverage by strand direction is used for strand bias.  need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them.
            RedistributeStitchedCoverage(startPointCoverage);
            RedistributeStitchedCoverage(endPointCoverage);

            // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed)
            for (var directionIndex = 0; directionIndex < 2; directionIndex++)
            {
                var exactCoverageForDir = presumeAnchoredForExactCov ? ((startPointCoverage[directionIndex] + endPointCoverage[directionIndex])) / 2f :   //will always round to lower.
                                          Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]);
                variant.EstimatedCoverageByDirection[directionIndex] = (int)exactCoverageForDir;

                exactTotalCoverage += exactCoverageForDir;
            }

            //for extended variants, coverage is not an exact value.
            //Its an estimate based on the depth over the length of the variant.
            //In particular, the depth by direction does not always allocate neatly to an integer value.

            //ie, variant.TotalCoverage != variant.EstimatedCoverageByDirection[directionIndex].Sum

            variant.TotalCoverage           = (int)exactTotalCoverage;
            variant.ReferenceSupport        = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport);
            variant.SuspiciousCoverageStart = suspiciousCoverageLeft;
            variant.ConfidentCoverageStart  = confidentCoverageLeft;
            variant.SuspiciousCoverageEnd   = suspiciousCoverageRight;
            variant.ConfidentCoverageEnd    = confidentCoverageRight;
        }
Esempio n. 12
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            var IntervalsInUse = includeRefAlleles ? intervals : CreateIntervalsFromAllels(chrReference, forcesGtAlleles);

            if (includeRefAlleles || (forcesGtAlleles != null && forcesGtAlleles.Count != 0))
            {
                var regionsToFetch = IntervalsInUse == null
                    ? new List <Region> {
                    this
                }                                      // fetch whole block region
                    : IntervalsInUse.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = 0;
                                for (int anchorIndex = 0; anchorIndex < NumAnchorIndexes; anchorIndex++)
                                {
                                    var countForAnchorType = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex, anchorIndex];
                                    count += countForAnchorType;
                                }

                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;

                                    // TODO this isn't really proven to be well-anchored, nor is it proven not to be
                                    //refAllele.WellAnchoredSupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (IntervalsInUse != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
Esempio n. 13
0
        private SortedList <int, List <CalledAllele> > CallForPositions(List <CandidateAllele> candidates, IAlleleSource source, int?maxPosition)
        {
            var failedMnvs      = new List <CalledAllele>();
            var callableAlleles = new List <CalledAllele>();

            if (_collapser != null)
            {
                candidates = _collapser.Collapse(candidates.ToList(), source, maxPosition);
            }

            foreach (var candidate in candidates)
            {
                var variant = AlleleHelper.Map(candidate);

                if (variant.Type == AlleleCategory.Mnv)
                {
                    ProcessVariant(source, variant);
                    if (IsCallable(variant))
                    {
                        callableAlleles.Add(variant);
                    }
                    else
                    {
                        failedMnvs.Add(variant);
                    }
                }

                else
                {
                    callableAlleles.Add(variant);
                }
            }

            var leftoversInNextBlock = MnvReallocator.ReallocateFailedMnvs(failedMnvs, callableAlleles, maxPosition);

            source.AddCandidates(leftoversInNextBlock.Select(AlleleHelper.Map));

            source.AddGappedMnvRefCount(GetRefSupportFromGappedMnvs(callableAlleles));

            var calledAllelesByPosition = new SortedList <int, List <CalledAllele> >(); //

            foreach (var failedMNV in failedMnvs)
            {
                //if any of these failed MNVs was an injected ForcedGT varaint, we need to spike it back in,
                //so it still gets reported to the VCF
                if (IsForcedAllele(failedMNV))
                {
                    callableAlleles.Add(failedMNV);
                }
            }

            // need to re-process variants since they may have additional support
            foreach (var baseCalledAllele in callableAlleles)
            {
                ProcessVariant(source, baseCalledAllele);
                if (IsForcedAllele(baseCalledAllele) && !(IsCallable(baseCalledAllele) && ShouldReport(baseCalledAllele)))
                {
                    baseCalledAllele.IsForcedToReport = true;
                    baseCalledAllele.AddFilter(FilterType.ForcedReport);
                }

                if ((IsCallable(baseCalledAllele) && ShouldReport(baseCalledAllele)) || IsForcedAllele(baseCalledAllele))
                {
                    List <CalledAllele> calledAtPosition;
                    if (!calledAllelesByPosition.TryGetValue(baseCalledAllele.ReferencePosition, out calledAtPosition))
                    {
                        calledAtPosition = new List <CalledAllele>();
                        calledAllelesByPosition.Add(baseCalledAllele.ReferencePosition, calledAtPosition);
                    }

                    calledAtPosition.Add(baseCalledAllele);
                }
            }

            // re-process variants by loci to get GT (to potentially take into account multiple var alleles at same loci)
            // and prune allele lists as needed.
            foreach (var allelesAtPosition in calledAllelesByPosition.Values)
            {
                ComputeGenotypeAndFilterAllele(allelesAtPosition);
                _locusProcessor.Process(allelesAtPosition);
            }

            return(calledAllelesByPosition);
        }
Esempio n. 14
0
        private IEnumerable <CandidateAllele> ExtractSnvsFromOperation(Read alignment, string refChromosome, int opStartIndexInRead, uint operationLength, int opStartIndexInReference, string chromosomeName)
        {
            var candidateSingleNucleotideAlleles = new List <CandidateAllele>();
            var variantLengthSoFar        = 0;
            var interveningRefLengthSoFar = 0;

            for (var i = 0; i < operationLength; i++)
            {
                var qualityGoodEnough = alignment.Qualities[opStartIndexInRead + i] >= _minimumBaseCallQuality;

                var readBase = alignment.Sequence[opStartIndexInRead + i];
                if (opStartIndexInReference + i >= refChromosome.Length)
                {
                    break;
                }
                var refBase = refChromosome[opStartIndexInReference + i];

                var atEndOfOperation            = i == (operationLength - 1);
                var startingMnvAtEndOfOperation = (atEndOfOperation && variantLengthSoFar == 0);

                //Do not create/extend a variant if the quality isn't good enough or the allele or ref is an N
                if ((AlleleHelper.GetAlleleType(readBase) == AlleleType.N) || (AlleleHelper.GetAlleleType(refBase) == AlleleType.N) || !qualityGoodEnough)
                {
                    FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                 opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar,
                                 interveningRefLengthSoFar, candidateSingleNucleotideAlleles);
                    variantLengthSoFar        = 0;
                    interveningRefLengthSoFar = 0;
                }
                else
                {
                    if (BasesMatch(refBase, readBase))
                    {
                        if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, true) &&
                            !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation
                        {
                            variantLengthSoFar++;
                            interveningRefLengthSoFar++;
                        }
                        else
                        {
                            FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                         opStartIndexInReference + i - variantLengthSoFar, chromosomeName, variantLengthSoFar,
                                         interveningRefLengthSoFar, candidateSingleNucleotideAlleles);
                            variantLengthSoFar        = 0;
                            interveningRefLengthSoFar = 0;
                        }
                    }
                    else
                    {
                        if (ShouldBuildUpMNV(variantLengthSoFar, interveningRefLengthSoFar, false) &&
                            !startingMnvAtEndOfOperation) //Don't build up an MNV if we're on the last base of operation
                        {
                            variantLengthSoFar++;
                            interveningRefLengthSoFar = 0;
                        }
                        else
                        {
                            FlushVariant(alignment, refChromosome, opStartIndexInRead + i - variantLengthSoFar,
                                         opStartIndexInReference + i - variantLengthSoFar,
                                         chromosomeName, variantLengthSoFar, interveningRefLengthSoFar,
                                         candidateSingleNucleotideAlleles);
                            variantLengthSoFar        = 1;
                            interveningRefLengthSoFar = 0;
                        }
                    }
                }
            }
            //Flush if we've gotten to the end
            FlushVariant(alignment, refChromosome, opStartIndexInRead + ((int)operationLength) - variantLengthSoFar, opStartIndexInReference + ((int)operationLength) - variantLengthSoFar, chromosomeName, variantLengthSoFar, interveningRefLengthSoFar, candidateSingleNucleotideAlleles);


            return(candidateSingleNucleotideAlleles);
        }
Esempio n. 15
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            if (includeRefAlleles)
            {
                var regionsToFetch = intervals == null
                    ? new List <Region> {
                    this
                }                                 // fetch whole block region
                    : intervals.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex];
                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (intervals != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
Esempio n. 16
0
        public void AddAlleleCounts(Read alignment)
        {
            if (!_readLength.HasValue)
            {
                _readLength = alignment.ReadLength;
            }

            var lastPosition = alignment.Position - 1;

            var cigarData = alignment.CigarData;

            var deletionLength               = 0;
            var lengthBeforeDeletion         = alignment.ReadLength;
            var endsInDeletion               = cigarData.HasOperationAtOpIndex(0, 'D', true);
            var endsInDeletionBeforeSoftclip = cigarData.HasOperationAtOpIndex(1, 'D', true) && cigarData.HasOperationAtOpIndex(0, 'S', true);

            if (endsInDeletion || endsInDeletionBeforeSoftclip)
            {
                deletionLength = (int)(endsInDeletionBeforeSoftclip ? cigarData[cigarData.Count - 2].Length :
                                       cigarData[cigarData.Count - 1].Length);
                lengthBeforeDeletion = (int)(endsInDeletionBeforeSoftclip ? alignment.ReadLength - cigarData[cigarData.Count - 1].Length :  alignment.ReadLength);
            }

            for (var i = 0; i < alignment.PositionMap.Length; i++)
            {
                if ((endsInDeletionBeforeSoftclip) && i == lengthBeforeDeletion)
                {
                    for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                    {
                        AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]);
                    }
                }

                var position = alignment.PositionMap[i];

                if (position == -1)
                {
                    continue; // not mapped to reference
                }

                for (var j = lastPosition + 1; j < position; j++) // add any deletion counts
                {
                    AddAlleleCount(j, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[i]);
                }

                var alleleType = AlleleHelper.GetAlleleType(alignment.Sequence[i]);

                if (alignment.Qualities[i] < _minBasecallQuality)
                {
                    alleleType = AlleleType.N; // record this event as a no call
                }
                AddAlleleCount(position, alleleType, alignment.SequencedBaseDirectionMap[i]);
                AddAlleleBaseQuality(position, alleleType, alignment.SequencedBaseDirectionMap[i], Math.Pow(10, -1 * (int)alignment.Qualities[i] / 10f));

                lastPosition = position;
            }

            if (endsInDeletion)
            {
                for (var j = 1; j < deletionLength + 1; j++) // add any terminal deletion counts
                {
                    AddAlleleCount(j + lastPosition, AlleleType.Deletion, alignment.SequencedBaseDirectionMap[alignment.SequencedBaseDirectionMap.Length - 1]);
                }
            }

            // add coverage summary
            if (_trackReadSummaries)
            {
                var coverageSummary = alignment.GetCoverageSummary();
                var block           = GetBlock(coverageSummary.ClipAdjustedEndPosition);
                // store by end position so we can always be forward looking
                block.AddReadSummary(coverageSummary.ClipAdjustedEndPosition, coverageSummary);
            }
        }