private bool IsForcedAllele(CalledAllele baseCalledAllele) { if (ForcedGtAlleles == null) { return(false); } var allele = new Tuple <string, int, string, string>(baseCalledAllele.Chromosome, baseCalledAllele.ReferencePosition, baseCalledAllele.ReferenceAllele, baseCalledAllele.AlternateAllele); return(ForcedGtAlleles.Contains(allele)); }
//this is not as obvious as it seems. What is the depth of a het-alt1-alt2, when you have two insertions of different length? //Least controversial is to take the maximum. public int GetDepthCountInt(IEnumerable <CalledAllele> variants) { CalledAllele firstVariant = variants.First(); int totalDepth = 0; foreach (var variant in variants) { totalDepth = Math.Max(variant.TotalCoverage, totalDepth); } return(totalDepth); }
private void CallCandidate(CalledAllele mnv, bool isReference) { mnv.NoiseLevelApplied = _bamParams.MinimumBaseCallQuality; //tjd //since any input variant passed filters, we assume it is not SB'ed. //Before the big Scylla refactor, we used to copy the SB number from the original variant at this index. but thats not really consistent with the new call. //gb //yes, we can assume that the component variants were not strand biased according to the thresholds used in the original Pisces run, but that doesn't mean that they're necessarily -100 //tjd: //Correct. Alternatly, here are some other options: // A) We could leave the SB tag off the final var call, or set it to Nan, // but that could be a downstream parsing problem. // B) We could make some number up based the SB of the input variants // (some max value to represent the worst case of the component variants in the nbdh...?) // C) Implement a fully stranded phasing model. // IMO, if we put work into this, we should do C. This will probably be a feature request before too long anyway. //For the moment, nothing affects actually pass/Fail varcall decisions. mnv.StrandBiasResults.GATKBiasScore = -100; if (mnv.VariantQscore < _callerParams.MinimumVariantQScore) { if (isReference) { mnv.Genotype = Pisces.Domain.Types.Genotype.RefLikeNoCall; } else { mnv.Genotype = Pisces.Domain.Types.Genotype.AltLikeNoCall; } mnv.GenotypeQscore = 0; } if (mnv.Frequency < _callerParams.MinimumFrequency) { if (isReference) { mnv.Genotype = Pisces.Domain.Types.Genotype.RefLikeNoCall; } else { mnv.Genotype = Pisces.Domain.Types.Genotype.AltLikeNoCall; } mnv.VariantQscore = 0; mnv.GenotypeQscore = 0; } //as an alternative to setting them to nocalls, we could just omit them from gvcf. }
/// <summary> /// Step forward with the reader, assembling a list of variants at your CurrentVariant position. /// </summary> /// <param name="Reader"></param> /// <param name="CurrentVariant"></param> /// <param name="BackLogExists"></param> /// <param name="TheBackLog"></param> /// <returns></returns> private static List <CalledAllele> AssembleColocatedList( AlleleReader Reader, CalledAllele CurrentVariant, AlleleCompareByLoci alleleOrdering, ref bool BackLogExists, ref List <CalledAllele> TheBackLog) { List <CalledAllele> CoLocatedVariants = new List <CalledAllele>(); bool ContinueReadA = true; var NextVariantList = new List <CalledAllele>(); while (ContinueReadA) { if (BackLogExists) { NextVariantList = TheBackLog; BackLogExists = false; } else { ContinueReadA = Reader.GetNextVariants(out NextVariantList); if (!ContinueReadA) { break; } } // VarOrder = -1 if Current comes first, 0 if co-located. int VarOrder = (alleleOrdering.OrderAlleles(CurrentVariant, NextVariantList.First())); switch (VarOrder) { case 0: //the variant we just got is at out current position CoLocatedVariants.AddRange(NextVariantList); break; case -1: //the variant we just got is after our current position, and needs to go to the backlog. TheBackLog = NextVariantList; //NextVariant; ContinueReadA = false; BackLogExists = true; break; default: // { throw new InvalidDataException("Vcf needs to be ordered."); } } } if (!BackLogExists) { TheBackLog = null; } return(CoLocatedVariants); }
private static bool IsPotentialOverlap(CalledAllele callableAllele, CalledAllele failedMnv) { return(callableAllele.Coordinate >= failedMnv.Coordinate && callableAllele.Chromosome == failedMnv.Chromosome && callableAllele.Coordinate <= (failedMnv.Coordinate + failedMnv.Alternate.Length) && callableAllele.Alternate.Length <= failedMnv.Alternate.Length && callableAllele.Coordinate + callableAllele.Alternate.Length <= (failedMnv.Coordinate + failedMnv.Alternate.Length) && (callableAllele.Type == AlleleCategory.Mnv || callableAllele.Type == AlleleCategory.Snv || callableAllele.Type == AlleleCategory.Reference)); }
public static int OrderVariants(CalledAllele a, CalledAllele b, bool mFirst) { var vcfVariantA = new VcfVariant { ReferencePosition = a.Coordinate, ReferenceName = a.Chromosome }; var vcfVariantB = new VcfVariant { ReferencePosition = b.Coordinate, ReferenceName = b.Chromosome }; return(Extensions.OrderVariants(vcfVariantA, vcfVariantB, mFirst)); }
public static double GetAlternateAlleleFrequency(CalledAllele variant) { if (variant.HasAnAltAllele) { return(variant.Frequency); } else { return((double)GetAlternateAlleleSupport(variant) / variant.TotalCoverage); } }
public static int GetAlternateAlleleSupport(CalledAllele variant) { if (variant.HasAnAltAllele) { return(variant.AlleleSupport); } else { return(variant.TotalCoverage - variant.AlleleSupport); } }
public static MutationCategory GetMutationCategory( CalledAllele variant) { if (variant.Type == Pisces.Domain.Types.AlleleCategory.Reference) { return(MutationCategory.Reference); } return(GetMutationCategory(variant.ReferenceAllele, variant.AlternateAllele)); }
private void ComputeCoverageTestInternalWeighted(CalledAllele variant, List <AlleleCount> stagedCounts, int expectedCoverageLowerBound, int expectedCoverageUpperBound, float expectedWeightLowerBound, float expectedWeightUpperBound) { var mockStateManager = CreateMockStateManager(stagedCounts, 0); new CoverageCalculator(true).Compute(variant, mockStateManager); Assert.True(expectedCoverageLowerBound <= variant.TotalCoverage); Assert.True(expectedCoverageUpperBound >= variant.TotalCoverage); Assert.True(expectedWeightLowerBound <= variant.UnanchoredCoverageWeight); Assert.True(expectedWeightUpperBound >= variant.UnanchoredCoverageWeight); }
private static bool CheckIfUsed(List <CalledAllele> usedAlleles, CalledAllele originalCall) { foreach (var allele in usedAlleles) { if (originalCall.IsSameAllele(allele)) { return(true); } } return(false); }
private static CalledAllele SetUpCalledAllele() { var v = new CalledAllele(); v.TotalCoverage = 100; v.ReferenceSupport = 90; v.AlleleSupport = 10; v.GenotypeQscore = 72; v.NoiseLevelApplied = 20; v.VariantQscore = 666; return(v); }
public static void CheckVariantsMatch(VcfVariant baseline, CalledAllele test) { Assert.Equal(baseline.ReferenceAllele, test.ReferenceAllele); Assert.Equal(baseline.VariantAlleles[0], test.AlternateAllele); Assert.Equal(baseline.VariantAlleles.Length, 1); Assert.Equal(baseline.ReferenceName, test.Chromosome); Assert.Equal(baseline.ReferencePosition, test.ReferencePosition); int numAlts = (baseline.VariantAlleles[0] == ".") ? 0 : baseline.VariantAlleles.Length; Assert.Equal(VcfVariantUtilities.MapGTString(baseline.Genotypes[0]["GT"], numAlts), test.Genotype); }
public void CandidateAllele_CheckType() { var allele = new CalledAllele() { ReferenceAllele = "A", AlternateAllele = "." }; allele.SetType(); Assert.Equal(AlleleCategory.Reference, allele.Type); allele = new CalledAllele() { ReferenceAllele = "A", AlternateAllele = "A" }; allele.SetType(); Assert.Equal(AlleleCategory.Reference, allele.Type); allele = new CalledAllele() { ReferenceAllele = "A", AlternateAllele = "C" }; allele.SetType(); Assert.Equal(AlleleCategory.Snv, allele.Type); allele = new CalledAllele() { ReferenceAllele = "AC", AlternateAllele = "CG" }; allele.SetType(); Assert.Equal(AlleleCategory.Mnv, allele.Type); allele = new CalledAllele() { ReferenceAllele = "AAA", AlternateAllele = "A" }; allele.SetType(); Assert.Equal(AlleleCategory.Deletion, allele.Type); allele = new CalledAllele() { ReferenceAllele = "A", AlternateAllele = "ACG" }; allele.SetType(); Assert.Equal(AlleleCategory.Insertion, allele.Type); allele = new CalledAllele() { ReferenceAllele = "AFGGGG", AlternateAllele = "ACG" }; allele.SetType(); Assert.Equal(AlleleCategory.Unsupported, allele.Type); }
private static void ApplyFilters(CalledAllele allele, int?minCoverageFilter, int?variantQscoreThreshold, bool filterSingleStrandVariants, float?variantFreqFilter, float?lowGenotypeqFilter, int?indelRepeatFilter, RMxNFilterSettings rMxNFilterSettings, bool hasStitchedSource, ChrReference chrReference) { //Reset filters allele.Filters.Clear(); if (minCoverageFilter.HasValue && allele.TotalCoverage < minCoverageFilter) { allele.AddFilter(FilterType.LowDepth); } if (variantQscoreThreshold.HasValue && allele.VariantQscore < variantQscoreThreshold && (allele.TotalCoverage != 0)) { //note we wont flag it for Qscore, if its got zero depth, because in that case, the Q score calc was not made anyway. allele.AddFilter(FilterType.LowVariantQscore); } if (allele.Type != AlleleCategory.Reference) { if (!allele.StrandBiasResults.BiasAcceptable || (filterSingleStrandVariants && !allele.StrandBiasResults.VarPresentOnBothStrands)) { allele.AddFilter(FilterType.StrandBias); } if (indelRepeatFilter.HasValue && indelRepeatFilter > 0) { var indelRepeatLength = ComputeIndelRepeatLength(allele, chrReference.Sequence); if (indelRepeatFilter <= indelRepeatLength) { allele.AddFilter(FilterType.IndelRepeatLength); } } if (RMxNCalculator.ShouldFilter(allele, rMxNFilterSettings, chrReference.Sequence)) { allele.AddFilter(FilterType.RMxN); } if (variantFreqFilter.HasValue && allele.Frequency < variantFreqFilter) { allele.AddFilter(FilterType.LowVariantFrequency); } if (hasStitchedSource) //can only happen for insertions and MNVs { if (allele.Alternate.Contains("N")) { allele.AddFilter(FilterType.StrandBias); } } } }
// jg todo - set numnocalls - appears to only be applicable to SNVs private static void SetFractionNoCall(CalledAllele allele) { var allReads = (float)(allele.TotalCoverage + allele.NumNoCalls); if (allReads == 0) { allele.FractionNoCalls = 0; } else { allele.FractionNoCalls = allele.NumNoCalls / allReads; } }
public void GetMultiAllelicQScores() { CalledAllele variant1 = TestHelper.CreateDummyAllele("chr1", 1000, "A", "C", 30, 12); CalledAllele variant2 = TestHelper.CreateDummyAllele("chr1", 1000, "A", "T", 30, 11); MixtureModelResult result = AdaptiveGenotyperCalculator.GetMultiAllelicQScores(variant1, variant2, new List <double[]> { Means, Means }); // The 4th GP should always be the minimum because that reflects the 1/2 call Assert.Equal(4, result.GenotypePosteriors.ToList().IndexOf(result.GenotypePosteriors.Min())); }
protected virtual void CalculateSinglePoint(CalledAllele allele, IAlleleSource alleleCountSource) { //TODO: Is there a reason why we don't reallocate the stitched coverage here for point mutations? (as we do with spanning ones) // sum up all observations at that point var variant = allele as CalledAllele; for (var direction = 0; direction < Constants.NumDirectionTypes; direction++) { foreach (var alleleType in Constants.CoverageContributingAlleles) { allele.EstimatedCoverageByDirection[direction] += alleleCountSource.GetAlleleCount(allele.ReferencePosition, alleleType, (DirectionType)direction); allele.SumOfBaseQuality += alleleCountSource.GetSumOfAlleleBaseQualities(allele.ReferencePosition, alleleType, (DirectionType)direction); if (alleleType != AlleleHelper.GetAlleleType(allele.ReferenceAllele)) { continue; } if (variant != null) { variant.ReferenceSupport += alleleCountSource.GetAlleleCount(variant.ReferencePosition, alleleType, (DirectionType)direction); } } allele.TotalCoverage += allele.EstimatedCoverageByDirection[direction]; // For single point variants, for now, we're calling everything confident coverage allele.ConfidentCoverageStart += allele.EstimatedCoverageByDirection[direction]; allele.ConfidentCoverageEnd += allele.EstimatedCoverageByDirection[direction]; allele.NumNoCalls += alleleCountSource.GetAlleleCount(allele.ReferencePosition, AlleleType.N, (DirectionType)direction); } // adjust for reference counts already taken up by gapped mnvs // note: it's possible that the ref count taken up by a gapped mnv is greater than depth at that ref position. // this is possible when collapsing is true, and some gapped ref positions have low quality (or are N). // in these cases, they get collapsed to the mnv and count towards support, but those specific alleles were never added to region's allele counts because they are low quality. // collapsing is the correct thing to do, so this is ok. we should just make sure to cap at 0. var gappedRefCounts = alleleCountSource.GetGappedMnvRefCount(allele.ReferencePosition); if (allele.Type == AlleleCategory.Snv && variant != null) { variant.ReferenceSupport = Math.Max(0, variant.ReferenceSupport - gappedRefCounts); } else if (allele.Type == AlleleCategory.Reference) { allele.AlleleSupport = Math.Max(0, allele.AlleleSupport - gappedRefCounts); } }
public CalledAllele ReCallAsRef(CalledAllele usedVariant, int numRefCallsSuckedUpByOtherVariants) { var newRef = PhasedVariantExtractor.Create( usedVariant.Chromosome, usedVariant.Coordinate, usedVariant.Reference.Substring(0, 1), ".", Math.Max(0, usedVariant.ReferenceSupport - numRefCallsSuckedUpByOtherVariants), usedVariant.TotalCoverage, Pisces.Domain.Types.AlleleCategory.Reference, _bamParams.MinimumBaseCallQuality, _callerParams.MaximumVariantQScore); CallCandidate(newRef, true); AddFilters(newRef, true); return(newRef); }
/// <summary> /// Assign a q-score to a SNP, given (CallCount / Coverage) frequency. /// </summary> public static void Compute(CalledAllele allele, int maxQScore, int estimatedBaseCallQuality) { allele.NoiseLevelApplied = estimatedBaseCallQuality; if (allele.TotalCoverage == 0) { allele.VariantQscore = 0; } else { allele.VariantQscore = AssignPoissonQScore(allele.AlleleSupport, allele.TotalCoverage, estimatedBaseCallQuality, maxQScore); } }
private static int CombineQualitiesByTakingMinValue(CalledAllele VariantA, CalledAllele VariantB) { if (VariantB == null) { return(VariantA.VariantQscore); } if (VariantA == null) { return(VariantB.VariantQscore); } return(Math.Min(VariantA.VariantQscore, VariantB.VariantQscore)); }
public static void Compute(CalledAllele variant, int maxQScore, float?filterThreshold) { if (filterThreshold.HasValue) { var acceptanceCriteria = (float)filterThreshold; //restrict use to SNPs for now. We have not done any testing on this for indels, and indel coverage calc for amplicons would need special handling. if (variant.Type == AlleleCategory.Snv) { variant.AmpliconBiasResults = CalculateAmpliconBias(variant.SupportByAmplicon, variant.CoverageByAmplicon, acceptanceCriteria, maxQScore); } } }
/// <summary> /// Calculates the Q score and genotype posteriors of a 1/2 locus using a multinomial distribution. /// This method is called from Pisces variant caller. /// </summary> /// <param name="allele1"></param> /// <param name="allele2"></param> /// <param name="models">IList of models for allele1 and allele 2. Each model is a 3 element double array.</param> /// <returns>RecalibratedVariant that contains the Q score and genotype posteriors.</returns> public static MixtureModelResult GetMultiAllelicQScores(CalledAllele allele1, CalledAllele allele2, IList <double[]> models) { int totalCoverage = allele1.TotalCoverage; int[] alleleDepths = new int[3]; alleleDepths[2] = allele2.AlleleSupport; alleleDepths[1] = allele1.AlleleSupport; // "Reference" here is not really reference--it is just everything else that was not the top two alleles alleleDepths[0] = Math.Max(totalCoverage - alleleDepths[1] - alleleDepths[2], 0); return(MixtureModel.GetMultinomialQScores(alleleDepths, totalCoverage, models)); }
/// <summary> /// Combines two variants made by PhasedVariantExtractor. /// Simplistic in that it only worries about the relevant fields, and not every possible field. /// LIMITATION: this assumes that allele1 and allele2 are the same variant in terms of chr, pos, ref / alt alleles. /// see CalledAllele.IsSameAlle() . This should be true for the variants in this method /// </summary> /// <param name="allele1"></param> /// <param name="allele2"></param> /// <param name="maxQscore"></param> /// <returns></returns> public static CalledAllele CombinePhasedVariants(CalledAllele allele1, CalledAllele allele2, int maxQscore) { //NOTE this assumes that allele1 and allele2 are the same variant in terms of chr, pos, ref / alt alleles. //see CalledAllele.IsSameAlle() . This should be true for the variants in this method CalledAllele result = Create(allele1.Chromosome, allele1.ReferencePosition, allele1.ReferenceAllele, allele1.AlternateAllele, allele1.AlleleSupport + allele2.AlleleSupport, (allele1.NumNoCalls + allele2.NumNoCalls) / 2, (allele1.TotalCoverage + allele2.TotalCoverage) / 2, (allele1.ReferenceSupport + allele2.ReferenceSupport) / 2, allele1.Type, allele1.NoiseLevelApplied, maxQscore); return(result); }
private void TestCalculation(CalledAllele variant, double frequency, double depth, int expectedValue) { variant.TotalCoverage = (int)depth; variant.AlleleSupport = (int)(depth * frequency); if (variant.Genotype == Genotype.HomozygousRef) { variant.AlleleSupport = (int)(depth * (1.0 - frequency)); } MixtureModelResult result = AdaptiveGenotyperCalculator.GetGenotypeAndQScore(variant, Means, Priors); Assert.Equal(expectedValue, result.QScore); }
private static void TestCalculation(CalledAllele variant, double frequency, double depth, int expectedValue) { variant.TotalCoverage = (int)depth; variant.AlleleSupport = (int)(depth * frequency); if (variant.Genotype == Genotype.HomozygousRef) { variant.AlleleSupport = (int)(depth * (1.0 - frequency)); } int result = DiploidGenotypeQualityCalculator.Compute(variant, 0, int.MaxValue); Assert.Equal(expectedValue, result); }
public static void PrintBiasStats(StreamWriter writer, CalledAllele variant) { if (variant.ReferenceAllele == variant.AlternateAllele) { return; //skip ref calls. } var strandBiasResults = StatsToString(variant.StrandBiasResults); StringBuilder sb = new StringBuilder(string.Format("{0}\t{1}\t{2}\t{3}\t", variant.Chromosome, variant.ReferencePosition, variant.ReferenceAllele, variant.AlternateAllele)); sb.Append(strandBiasResults); writer.WriteLine(sb.ToString()); }
/// <summary> /// Calculates repeats for insertions and deletions by scanning up to 50 base pairs of the chromosome reference on either side of the allele coordinate. /// Duplicates of the allele alternate found in the reference are summed to compute the overall repeat length.. Useful for filtering some /// indels - e.g. we mistrust a call of AAAAAAAA versus reference AAAAAAAAA, since it may be /// polymerase slippage during PCR in sample prep, rather than an actual mutation. /// </summary> private static int ComputeIndelRepeatLength(CalledAllele allele, string referenceBases) { if (String.IsNullOrEmpty(referenceBases)) { return(0); } if (allele.Type != AlleleCategory.Insertion && allele.Type != AlleleCategory.Deletion && allele.Type != AlleleCategory.Snv) { return(0); } // Logic from GetFlankingBases: var stringPos = allele.ReferencePosition - 1; var upstreamBegin = stringPos - FlankingBaseCount; var upstreamEnd = stringPos - 1; var downstreamBegin = stringPos; var downstreamEnd = stringPos + FlankingBaseCount - 1; if (upstreamBegin < 0) { upstreamBegin = 0; } if (downstreamBegin < 0) { downstreamBegin = 0; } if (downstreamEnd >= referenceBases.Length) { downstreamEnd = referenceBases.Length - 1; } if (upstreamEnd >= referenceBases.Length) { upstreamEnd = referenceBases.Length - 1; } var upstreamBases = String.Empty; if (upstreamEnd >= 0) { upstreamBases = referenceBases.Substring(upstreamBegin, upstreamEnd - upstreamBegin + 1).ToUpper(); } var downstreamBases = referenceBases.Substring(downstreamBegin, downstreamEnd - downstreamBegin + 1) .ToUpper(); var longestRepeatLength = CheckVariantRepeatCount(allele, upstreamBases, downstreamBases); return(longestRepeatLength); }
public void AddRejectedPhasedVariant(CalledAllele variant) { var match = _rejectedPhasedVariants.Find(v => v.IsSameAllele(variant)); if (match == null) { _rejectedPhasedVariants.Add(variant); } else { var combinedVar = PhasedVariantExtractor.CombinePhasedVariants(match, variant, MaxQScore); _rejectedPhasedVariants.Remove(match); _rejectedPhasedVariants.Add(combinedVar); } }
private bool MatchVariants(CalledAllele BaseCalledAllele, CandidateAllele candidateVariant, int?expectedSupport = null, float?expectedFreq = null) { if (BaseCalledAllele.Chromosome == candidateVariant.Chromosome && BaseCalledAllele.ReferencePosition == candidateVariant.ReferencePosition && BaseCalledAllele.ReferenceAllele == candidateVariant.ReferenceAllele && BaseCalledAllele.AlternateAllele == candidateVariant.AlternateAllele && BaseCalledAllele.Type == candidateVariant.Type && (expectedFreq == null || BaseCalledAllele.Frequency == expectedFreq) && (expectedSupport == null || BaseCalledAllele.AlleleSupport == expectedSupport) ) { return(true); } return(false); }