private BiasResultsAcrossAmplicons ExecuteTest(AmpliconCounts supportForAmplicons, AmpliconCounts coverageForAmplicons, bool expectNull = false) { var variant1 = new CalledAllele(AlleleCategory.Snv) { SupportByAmplicon = supportForAmplicons, CoverageByAmplicon = coverageForAmplicons }; AmpliconBiasCalculator.Compute(variant1, 100, 0.01F); var variant2 = new CalledAllele(AlleleCategory.Snv) { SupportByAmplicon = ReverseAmpliconData(supportForAmplicons), CoverageByAmplicon = ReverseAmpliconData(coverageForAmplicons) }; AmpliconBiasCalculator.Compute(variant2, 100, 0.01F); //sanity check, reversing the input must always give the same result. if (expectNull) { Assert.Null(variant1.AmpliconBiasResults); Assert.Null(variant2.AmpliconBiasResults); } else { Assert.Equal(variant1.AmpliconBiasResults.BiasDetected, variant2.AmpliconBiasResults.BiasDetected); } return(variant1.AmpliconBiasResults); }
public AmpliconCounts GetCountsByAmpliconForPosition(int position) { if (!IsPositionInRegion(position)) { throw new ArgumentException(string.Format("Position {0} is not in region '{1}'.", position, Name)); } var indexInBlock = position - StartPosition; List <string> names = new List <string>(); List <int> counts = new List <int>(); for (int i = 0; i < Constants.MaxNumOverlappingAmplicons; i++) { if (_ampliconNamesPerPos[indexInBlock] == null) { continue; } if (_ampliconNamesPerPos[indexInBlock][i] != null) { names.Add(_ampliconNamesPerPos[indexInBlock][i]); counts.Add(_ampliconCountsPerPos[indexInBlock][i]); } } var ampDataSumary = new AmpliconCounts() { CountsForAmplicon = counts.ToArray(), AmpliconNames = names.ToArray() }; return(ampDataSumary); }
public void HappyPath_VaryingDepthWithBias() { const double amp1Freq = 0.05; const double amp2Freq = 0.0; const double amp1Depth = 1000; double amp2Depth = 0; for (var i = 0; i < 1000; i++) { AmpliconCounts supportForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(amp1Freq * amp1Depth), (int)(amp2Freq * amp2Depth) } }; AmpliconCounts coverageForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(amp1Depth), (int)(amp2Depth) } }; var results = ExecuteTest(supportForAmplicons, coverageForAmplicons); if (amp2Depth < 100) { Assert.False(results.BiasDetected); // not enough coverage on one amplicon, to expect to see the varinat on both. } else { Assert.True(results.BiasDetected); // we have coverage on both amplicons, but its support only shows up on one } amp2Depth++; } }
public void TestAmpBiasWhenAmpNamesDontMatchUp() { //case where one amplicon has no support (or even a valid entry) AmpliconCounts supportForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { "B" }, CountsForAmplicon = new int[] { 150 } }; AmpliconCounts coverageForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { "A", "B" }, CountsForAmplicon = new int[] { 100, 300 } }; var results = ExecuteTest(supportForAmplicons, coverageForAmplicons); Assert.Equal(true, results.BiasDetected); //case where the support array is totally empty. (hard to image this happening, but we'll be defensive) supportForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { }, CountsForAmplicon = new int[] { } }; coverageForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { "A", "B" }, CountsForAmplicon = new int[] { 100, 150 } }; //will automatically check is null results = ExecuteTest(supportForAmplicons, coverageForAmplicons, true); //case where the support array has totally diffent amplicons than the coverage array //(hard to image this happening, but we'll be defensive. Indels and ForcedReport can be odd) supportForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { "C", "D" }, CountsForAmplicon = new int[] { 100, 150 } }; coverageForAmplicons = new AmpliconCounts() { AmpliconNames = new string[] { "A", "B" }, CountsForAmplicon = new int[] { 100, 150 } }; results = ExecuteTest(supportForAmplicons, coverageForAmplicons); Assert.Equal(false, results.BiasDetected); }
public void GetEmptySummaryForAmpliconTest() { AmpliconCounts exampleCounts = AmpliconCounts.GetEmptyAmpliconCounts(); Assert.Equal(Constants.MaxNumOverlappingAmplicons, exampleCounts.AmpliconNames.Length); Assert.Equal(Constants.MaxNumOverlappingAmplicons, exampleCounts.CountsForAmplicon.Length); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("amp1")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("amp2")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("amp3")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("foo")); }
public void GetAmpliconNameIndexTest() { string[] threeAmpliconNames = new string[] { "amp1", "amp2", "amp3" }; Assert.Equal(0, AmpliconCounts.GetAmpliconNameIndex("amp1", threeAmpliconNames).IndexForAmplicon); Assert.Equal(1, AmpliconCounts.GetAmpliconNameIndex("amp2", threeAmpliconNames).IndexForAmplicon); Assert.Equal(2, AmpliconCounts.GetAmpliconNameIndex("amp3", threeAmpliconNames).IndexForAmplicon); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("cat", threeAmpliconNames).IndexForAmplicon); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp1", threeAmpliconNames).NextOpenSlot); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp2", threeAmpliconNames).NextOpenSlot); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp3", threeAmpliconNames).NextOpenSlot); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("cat", threeAmpliconNames).NextOpenSlot); string[] fourAmpliconNames = new string[4]; fourAmpliconNames[0] = "rover"; fourAmpliconNames[1] = "fido"; Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp1", fourAmpliconNames).IndexForAmplicon); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp2", fourAmpliconNames).IndexForAmplicon); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("amp3", fourAmpliconNames).IndexForAmplicon); Assert.Equal(-1, AmpliconCounts.GetAmpliconNameIndex("cat", fourAmpliconNames).IndexForAmplicon); Assert.Equal(2, AmpliconCounts.GetAmpliconNameIndex("amp1", fourAmpliconNames).NextOpenSlot); Assert.Equal(2, AmpliconCounts.GetAmpliconNameIndex("amp2", fourAmpliconNames).NextOpenSlot); Assert.Equal(2, AmpliconCounts.GetAmpliconNameIndex("amp3", fourAmpliconNames).NextOpenSlot); Assert.Equal(2, AmpliconCounts.GetAmpliconNameIndex("cat", fourAmpliconNames).NextOpenSlot); AmpliconCounts exampleCounts1 = new AmpliconCounts() { AmpliconNames = threeAmpliconNames }; Assert.Equal(0, exampleCounts1.GetAmpliconNameIndex("amp1").IndexForAmplicon); Assert.Equal(1, exampleCounts1.GetAmpliconNameIndex("amp2").IndexForAmplicon); Assert.Equal(2, exampleCounts1.GetAmpliconNameIndex("amp3").IndexForAmplicon); Assert.Equal(-1, exampleCounts1.GetAmpliconNameIndex("cat").IndexForAmplicon); Assert.Equal(-1, exampleCounts1.GetAmpliconNameIndex("amp1").NextOpenSlot); AmpliconCounts exampleCounts2 = new AmpliconCounts() { AmpliconNames = fourAmpliconNames }; Assert.Equal(-1, exampleCounts2.GetAmpliconNameIndex("amp1").IndexForAmplicon); Assert.Equal(-1, exampleCounts2.GetAmpliconNameIndex("amp2").IndexForAmplicon); Assert.Equal(-1, exampleCounts2.GetAmpliconNameIndex("amp3").IndexForAmplicon); Assert.Equal(-1, exampleCounts2.GetAmpliconNameIndex("cat").IndexForAmplicon); Assert.Equal(2, exampleCounts2.GetAmpliconNameIndex("amp1").NextOpenSlot); }
public void GetCoverageForAmpliconTest() { string[] ampliconNames = new string[] { "amp1", "amp2", "amp3", "", "" }; int[] ampliconCounts = new int[] { 10, 0, 3, 0, 0 }; AmpliconCounts exampleCounts = new AmpliconCounts() { AmpliconNames = ampliconNames, CountsForAmplicon = ampliconCounts }; Assert.Equal(10, exampleCounts.GetCountsForAmplicon("amp1")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("amp2")); Assert.Equal(3, exampleCounts.GetCountsForAmplicon("amp3")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("")); Assert.Equal(0, exampleCounts.GetCountsForAmplicon("foo")); }
private static void SetAmpliconName(bool trackAmpliconCounts, Read alignment, CandidateAllele variant) { if (!trackAmpliconCounts) { return; } var ampliconName = alignment.GetAmpliconNameIfExists(); if (ampliconName != null) { var summary = AmpliconCounts.GetEmptyAmpliconCounts(); summary.AmpliconNames[0] = ampliconName; summary.CountsForAmplicon[0] = 1; variant.SupportByAmplicon = summary; } }
private void ExecuteTwoAmpTest(float ampAFreq, int ampADepth, float ampBFreq, int ampBDepth, bool isBiased) { AmpliconCounts supportForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(ampAFreq * ampADepth), (int)(ampBFreq * ampBDepth) } }; AmpliconCounts coverageForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(ampADepth), (int)(ampBDepth) } }; var results = ExecuteTest(supportForAmplicons, coverageForAmplicons); Assert.Equal(isBiased, results.BiasDetected); }
public void CopyForAmpliconTest() { string[] ampliconNames = new string[] { "amp1", "amp2", "amp3", "", "" }; int[] ampliconCounts = new int[] { 10, 0, 3, 0, 0 }; AmpliconCounts exampleCounts = new AmpliconCounts() { AmpliconNames = ampliconNames, CountsForAmplicon = ampliconCounts }; var newCounts = exampleCounts.Copy(); Assert.NotEqual(exampleCounts, newCounts); Assert.Equal(10, newCounts.GetCountsForAmplicon("amp1")); Assert.Equal(0, newCounts.GetCountsForAmplicon("amp2")); Assert.Equal(3, newCounts.GetCountsForAmplicon("amp3")); Assert.Equal(0, newCounts.GetCountsForAmplicon("")); Assert.Equal(0, newCounts.GetCountsForAmplicon("foo")); }
private AmpliconCounts ReverseAmpliconData(AmpliconCounts inputCounts) { var newNames = new string[inputCounts.AmpliconNames.Length]; var newCounts = new int[inputCounts.CountsForAmplicon.Length]; Array.Copy(inputCounts.AmpliconNames, newNames, inputCounts.AmpliconNames.Length); Array.Copy(inputCounts.CountsForAmplicon, newCounts, inputCounts.AmpliconNames.Length); Array.Reverse(newNames); Array.Reverse(newCounts); AmpliconCounts outputCounts = new AmpliconCounts() { AmpliconNames = newNames, CountsForAmplicon = newCounts }; return(outputCounts); }
/// <summary> /// Add the counts to the amplicon tracker. If this is not an amplicon, then "ampliconName" /// will be null, and this step will safely be skipped. /// </summary> /// <param name="position"></param> /// <param name="ampliconName"></param> public void AddAmpliconCount(int position, string ampliconName) { if (ampliconName != null) { if (IsPositionInRegion(position)) { var blockPositionIndex = position - StartPosition; var namesArrayAtPos = _ampliconNamesPerPos[blockPositionIndex]; //need to initialize if ((namesArrayAtPos == null) || (namesArrayAtPos.Length == 0)) { _ampliconNamesPerPos[blockPositionIndex] = new string[Constants.MaxNumOverlappingAmplicons]; _ampliconNamesPerPos[blockPositionIndex][0] = ampliconName; _ampliconCountsPerPos[blockPositionIndex] = new int[Constants.MaxNumOverlappingAmplicons]; _ampliconCountsPerPos[blockPositionIndex][0] = 1; } else //it exists { var indexData = AmpliconCounts.GetAmpliconNameIndex(ampliconName, namesArrayAtPos); var ampliconIndex = indexData.IndexForAmplicon; var emptySpotIndex = indexData.NextOpenSlot; if (ampliconIndex == -1) //but the amplicon name has not been seen yet { _ampliconNamesPerPos[blockPositionIndex][emptySpotIndex] = ampliconName; _ampliconCountsPerPos[blockPositionIndex][emptySpotIndex] = 1; } else //it has { _ampliconCountsPerPos[blockPositionIndex][ampliconIndex]++; }; } } } }
public void AddSupport(CandidateAllele fromAllele) { for (var i = 0; i < SupportByDirection.Length; i++) { SupportByDirection[i] += fromAllele.SupportByDirection[i]; } for (var i = 0; i < WellAnchoredSupportByDirection.Length; i++) { WellAnchoredSupportByDirection[i] += fromAllele.WellAnchoredSupportByDirection[i]; } if (fromAllele.SupportByAmplicon.AmpliconNames != null && fromAllele.SupportByAmplicon.AmpliconNames.Length > 0) { if (SupportByAmplicon.AmpliconNames == null || SupportByAmplicon.AmpliconNames.Length == 0) { SupportByAmplicon = AmpliconCounts.GetEmptyAmpliconCounts(); } for (var i = 0; i < fromAllele.SupportByAmplicon.AmpliconNames.Length; i++) { var fromAlleleCount = fromAllele.SupportByAmplicon.CountsForAmplicon[i]; var fromAlleleName = fromAllele.SupportByAmplicon.AmpliconNames[i]; var indexData = SupportByAmplicon.GetAmpliconNameIndex(fromAlleleName); if (indexData.IndexForAmplicon > -1) { SupportByAmplicon.CountsForAmplicon[indexData.IndexForAmplicon] += fromAlleleCount; } else { SupportByAmplicon.AmpliconNames[indexData.NextOpenSlot] = fromAlleleName; SupportByAmplicon.CountsForAmplicon[indexData.NextOpenSlot] += fromAlleleCount; } } } }
public void HappyPath_VaryingDepthWithNoBias() { double amp1Freq = 0.09; const double amp2Freq = 0.09; const int amp2Depth = 1000; for (var amp1Depth = 10; amp1Depth < 2000;) { amp1Depth = amp1Depth + 100; AmpliconCounts supportForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(amp1Freq * amp1Depth), (int)(amp2Freq * amp2Depth) } }; AmpliconCounts coverageForAmplicons = new AmpliconCounts() { AmpliconNames = twoAmpliconNames, CountsForAmplicon = new int[] { (int)(amp1Depth), (int)(amp2Depth) } }; var results = ExecuteTest(supportForAmplicons, coverageForAmplicons); bool freqAreSimliar = Math.Abs(results.ResultsByAmpliconName["amp1"].Frequency - results.ResultsByAmpliconName["amp2"].Frequency) < 0.05; if (freqAreSimliar) { Assert.False(results.BiasDetected); } else { Assert.True(results.BiasDetected); } } }
public void AddCandidate(CandidateAllele newCandidate, bool trackOpenEnded, bool trackAmplicon) { if (newCandidate.Type == AlleleCategory.Reference) { throw new ArgumentException(string.Format("Unable to add candidate '{0}': reference candidates are not tracked.", newCandidate)); } if (!IsPositionInRegion(newCandidate.ReferencePosition)) { throw new ArgumentException(string.Format("Unable to add candidate at position {0} to region '{1}'", newCandidate.ReferencePosition, Name)); } var regionIndex = newCandidate.ReferencePosition - StartPosition; var existingCandidates = _candidateVariantsLookup[regionIndex]; if (existingCandidates == null) { _candidateVariantsLookup[regionIndex] = new List <CandidateAllele> { newCandidate } } ; else { //TJD - this used to be a hash table, not a find, //where each variants unique signature was the key. //this might be why we have seen a performance hit in the new pisces. var foundAtIndex = trackOpenEnded ? existingCandidates.FindIndex(c => c.Equals(newCandidate) && c.OpenOnLeft == newCandidate.OpenOnLeft && c.OpenOnRight == newCandidate.OpenOnRight) : existingCandidates.FindIndex(c => c.Equals(newCandidate)); if (foundAtIndex == -1) { existingCandidates.Add(newCandidate); } else { var existingMatch = existingCandidates[foundAtIndex]; for (var i = 0; i < existingMatch.SupportByDirection.Length; i++) { existingMatch.SupportByDirection[i] += newCandidate.SupportByDirection[i]; } for (var i = 0; i < existingMatch.WellAnchoredSupportByDirection.Length; i++) { existingMatch.WellAnchoredSupportByDirection[i] += newCandidate.WellAnchoredSupportByDirection[i]; } for (var i = 0; i < existingMatch.ReadCollapsedCountsMut.Length; i++) { existingMatch.ReadCollapsedCountsMut[i] += newCandidate.ReadCollapsedCountsMut[i]; } if ((trackAmplicon) && (newCandidate.SupportByAmplicon.AmpliconNames != null)) { string[] ampliconList = newCandidate.SupportByAmplicon.AmpliconNames; for (var i = 0; i < ampliconList.Length; i++) { var ampliconName = ampliconList[i]; if (ampliconName == null) { continue; } if (existingMatch.SupportByAmplicon.AmpliconNames == null || existingMatch.SupportByAmplicon.AmpliconNames.Length == 0) { existingMatch.SupportByAmplicon = AmpliconCounts.GetEmptyAmpliconCounts(); } var existingAmpliconIndexData = existingMatch.SupportByAmplicon.GetAmpliconNameIndex(ampliconName); var ampliconIndex = existingAmpliconIndexData.IndexForAmplicon; var availableSlot = existingAmpliconIndexData.NextOpenSlot; if (ampliconIndex == -1) { existingMatch.SupportByAmplicon.AmpliconNames[availableSlot] = ampliconName; existingMatch.SupportByAmplicon.CountsForAmplicon[availableSlot] = 1; } else { existingMatch.SupportByAmplicon.AmpliconNames[ampliconIndex] = ampliconName; existingMatch.SupportByAmplicon.CountsForAmplicon[ampliconIndex]++; } } } } } UpdateMaxPosition(newCandidate); }
/// <summary> /// This method looks for bias in the variant support / total coverage ratios, by amplicon. /// This method is agnostic about where these support and coverage calculations come from, so it is up to the user /// to make sure the counts are appropriate for the variant in question. /// Note that for SNPs this is fairly straight forward, but for indels and MNVs it can become terribly difficult. /// This method should be used with appropriate caution. /// </summary> /// <param name="supportByAmplicon">the support counts, for each named amplicon</param> /// <param name="coverageByAmplicon">the coverage counts, for each named amplicon<</param> /// <param name="acceptanceCriteria">the minimumn probabilty we accept for the varaint being real, given the model</param> /// <param name="maxQScore">the max cap for a qscore. This parameter safegaurds against reporting insanely high confidence, given the limitations of a simple model that only addresses sampling error</param> /// <returns></returns> public static BiasResultsAcrossAmplicons CalculateAmpliconBias(AmpliconCounts supportByAmplicon, AmpliconCounts coverageByAmplicon, float acceptanceCriteria, int maxQScore) { //if we have no amplicon information, don't worry about it. if ((supportByAmplicon.AmpliconNames == null) || (supportByAmplicon.AmpliconNames.Length == 0) || (supportByAmplicon.AmpliconNames[0] == null)) { return(null); } //If we only have coverage on one amplicon, don't worry about it. There is no "bias" to detect. //We might later on, add a check to require extra evidence for variants only covered by one amplicon. TBD if (coverageByAmplicon.AmpliconNames.Length < 2) { return(null); } var resultDict = new BiasResultsAcrossAmplicons() { ResultsByAmpliconName = new Dictionary <string, AmpliconBiasResult>() }; var maxFreq = 0.0; for (int i = 0; i < coverageByAmplicon.AmpliconNames.Length; i++) { var name = coverageByAmplicon.AmpliconNames[i]; if (name == null) { break; } double support = supportByAmplicon.GetCountsForAmplicon(name); double coverage = coverageByAmplicon.CountsForAmplicon[i]; double freq = (coverage > 0) ? support / coverage : 0; if (freq >= maxFreq) { resultDict.AmpliconWithCandidateArtifact = name; maxFreq = freq; } var resultForAmplicon = new AmpliconBiasResult() { Frequency = freq, Name = name, ObservedSupport = support, Coverage = coverage }; resultDict.ResultsByAmpliconName.Add(name, resultForAmplicon); } bool shouldFailVariant = false; foreach (var amplicon in resultDict.ResultsByAmpliconName.Keys) { double coverage = resultDict.ResultsByAmpliconName[amplicon].Coverage; double support = resultDict.ResultsByAmpliconName[amplicon].ObservedSupport; double freq = resultDict.ResultsByAmpliconName[amplicon].Frequency; int qScore = 0; bool biasDetected = false; var allowableProb = acceptanceCriteria; double expectedNumObservationsOfVariant = maxFreq * coverage; var pChanceItsReal = 1.0; if (expectedNumObservationsOfVariant < Constants.MinNumObservations) { qScore = maxQScore; //we'd never see it anyway. Seems fine. } else if ((expectedNumObservationsOfVariant <= support) || (freq > Constants.FreePassObservationFreq)) { //we saw this variant quite a lot for this amplicon qScore = maxQScore; // it certainly seems to be in this amplicon! } else //we didnt see this variant much for this amplicon. Hm.... Perhaps its not real...? { //What is the chance this variant exists but just happened not to show up much on this amplicon's reads? //Lets look at the chance that we observed it at "support" or less, given the estimated frequency. pChanceItsReal = Math.Max(0.0, Poisson.Cdf(support, expectedNumObservationsOfVariant)); //biasProb = 1.0 - pChanceItsReal; var q = MathOperations.PtoQ(1.0 - pChanceItsReal); qScore = (int)q; } //if acceptanceCriteria = Q20, thats 1/100. //so, if we even have 1/100 chance of this happening, lets allow it. // Ie, a true variant would (generally) 50% of the time show up at its expected frequency. // Sometimes, it would show up less. At (1/100)% of the time, it only shows up at Z frequency. // So, if the observation is less likely than (1/100) for a real variant, we fail it. if (pChanceItsReal < allowableProb) { biasDetected = true; shouldFailVariant = true; } resultDict.ResultsByAmpliconName[amplicon].ChanceItsReal = pChanceItsReal; resultDict.ResultsByAmpliconName[amplicon].ConfidenceQScore = qScore; resultDict.ResultsByAmpliconName[amplicon].BiasDetected = biasDetected; resultDict.ResultsByAmpliconName[amplicon].ExpectedSupport = expectedNumObservationsOfVariant; resultDict.BiasDetected = shouldFailVariant; } return(resultDict); }