private StrandBiasResults ExecuteTest(Tuple <double, int> forwardStats, Tuple <double, int> reverseStats, Tuple <double, int> stitchedStats, int estimatedBaseCallQuality = 20, float threshold = 0.5f, StrandBiasModel model = StrandBiasModel.Poisson) { var origForwardSupport = (int)(forwardStats.Item1 * forwardStats.Item2); var origReverseSupport = (int)(reverseStats.Item1 * reverseStats.Item2); var origStitchedSupport = (int)(stitchedStats.Item1 * stitchedStats.Item2); var support = new int[] { origForwardSupport, origReverseSupport, origStitchedSupport, }; var variant = new CalledVariant(AlleleCategory.Snv) { TotalCoverageByDirection = new int[] { forwardStats.Item2, reverseStats.Item2, stitchedStats.Item2 } }; StrandBiasCalculator.Compute(variant, support, estimatedBaseCallQuality, threshold, model); Assert.Equal(origForwardSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ForwardStats.Support); Assert.Equal(origReverseSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ReverseStats.Support); return(variant.StrandBiasResults); }
public void Compute() { // Based on Tamsen's original PValue test, just extended to our Compute method List <int[]> SampleValues_ExpectedQScore = new List <int[]>() //coverage,var calls} { new int[] { 100, 0, 0 }, new int[] { 100, 1, 2 }, new int[] { 100, 5, 24 }, new int[] { 200, 10, 43 }, new int[] { 500, 25, 98 }, new int[] { 5000, 250, 100 }, }; foreach (int[] item in SampleValues_ExpectedQScore) { var variant = new CalledVariant(AlleleCategory.Snv) { Coordinate = 1, Reference = "A", Alternate = "T", TotalCoverage = item[0], AlleleSupport = item[1], }; QualityCalculator.Compute(variant, 100, 20); Assert.Equal(item[2], variant.Qscore); } }
/// <summary> /// Calculation for spanning variants requires looking at two datapoints and reconciling the coverage between the two. /// For insertions, take min of preceeding and trailing datapoints. /// For deletions and mnvs, take average of first and last datapoint for variant. /// </summary> private static void CalculateSpanning(CalledVariant variant, IStateManager alleleCountSource, int startPointPosition, int endPointPosition, bool anchored = true) { //empty arrays to do our coverage calculations. the three spaces are for each read direction. var startPointCoverage = new[] { 0, 0, 0 }; var endPointCoverage = new[] { 0, 0, 0 }; // sum coverage by direction across all allele types for each data point for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++) { foreach (var alleleType in Constants.CoverageContributingAlleles) { startPointCoverage[directionIndex] += alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex); endPointCoverage[directionIndex] += alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex); } } // coverage by strand direction is used for strand bias. need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them. RedistributeStitchedCoverage(startPointCoverage); RedistributeStitchedCoverage(endPointCoverage); // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed) for (var directionIndex = 0; directionIndex < 2; directionIndex++) { variant.TotalCoverageByDirection[directionIndex] = anchored ? (startPointCoverage[directionIndex] + endPointCoverage[directionIndex]) / 2 : Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]); } // coverage should be total across the directions. variant.TotalCoverage = variant.TotalCoverageByDirection.Sum(); variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport); }
public void ComputeCoverage_Point_WithGappedMnvTakingSupport() { var variant = new CalledVariant(AlleleCategory.Snv) { Coordinate = 1, Reference = "A", Alternate = "T", AlleleSupport = 10 }; //Although we make total ref support 53 below, 50 of it is "taken" by a gapped MNV, so we only expect 3 true ref support ComputeCoverageTest(variant, new List <AlleleCount>() { new AlleleCount() { AlleleType = AlleleType.T, Coordinate = 1, // coverage should only take into account the coordinate we're at DirectionCoverage = new [] { 100, 101, 111 } }, new AlleleCount() { AlleleType = AlleleType.A, Coordinate = 1, // coverage should only take into account the coordinate we're at DirectionCoverage = new [] { 21, 32, 0 } } }, new[] { 121, 133, 111 }, expectedSnvRef: 3, takenRefSupport: 50); }
public void ComputeCoverage_SupportGreaterThanCoverage() { //This shouldn't happen but don't barf var variant = new CalledVariant(AlleleCategory.Deletion) { Coordinate = 1, Reference = "ATCG", Alternate = "A", }; ComputeCoverageTest(variant, new List <AlleleCount>() { new AlleleCount() { Coordinate = 2, DirectionCoverage = new [] { 1, 1, 1 } }, new AlleleCount() { Coordinate = 4, DirectionCoverage = new [] { 1, 1, 1 } } }, new [] { 8, 7, 0 }, false, 100); //Reference support should be 0 Assert.Equal(0, variant.ReferenceSupport); }
public void ComputeCoverage_ZeroCoverage() { var variant = new CalledVariant(AlleleCategory.Deletion) { Coordinate = 1, Reference = "ATCG", Alternate = "A", AlleleSupport = 0 }; Action test = () => ComputeCoverageTest(variant, new List <AlleleCount>() { new AlleleCount() { Coordinate = 2, DirectionCoverage = new [] { 0, 0, 0 } }, new AlleleCount() { Coordinate = 4, DirectionCoverage = new [] { 0, 0, 0 } } }, new [] { 0, 0, 0 }, false); test(); //Reference support should be 0 Assert.Equal(0, variant.ReferenceSupport); //Frequency should be 0 (and not barf) Assert.Equal(0, variant.Frequency); //Now try the case where the VariantSupport is non-zero but the //allele counts are zero (shouldn't happen but don't barf) variant.AlleleSupport = 10; test(); //Reference support should be 0 Assert.Equal(0, variant.ReferenceSupport); //Frequency should be 0 (and not barf) Assert.Equal(0, variant.Frequency); }
public void ComputeCoverage_Point_HappyPath() { var variant = new CalledVariant(AlleleCategory.Snv) { Coordinate = 1, Reference = "A", Alternate = "T", AlleleSupport = 10 }; ComputeCoverageTest(variant, new List <AlleleCount>() { new AlleleCount() { AlleleType = AlleleType.T, Coordinate = 1, // coverage should only take into account the coordinate we're at DirectionCoverage = new [] { 100, 101, 111 } }, //Ref allele new AlleleCount() { AlleleType = AlleleType.A, Coordinate = 1, DirectionCoverage = new [] { 1, 2, 0 } }, //Coverage should consider other non-ref alleles, but ref support should not new AlleleCount() { AlleleType = AlleleType.C, Coordinate = 1, // coverage should only take into account the coordinate we're at DirectionCoverage = new [] { 5, 10, 1 } } }, new [] { 106, 113, 112 //Stitched coverage is not reallocated here in the point-mutation case, }, expectedSnvRef: 3); }
public void BreakOffEdgeReferences() { // ----------------------------------------------- // non-mnv should be returned as-is // ----------------------------------------------- var nonMnv = new CalledVariant(AlleleCategory.Deletion) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCCTT", Alternate = "T", }; var brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(nonMnv); Assert.Equal(1, brokenOutAlleles.Count()); Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(nonMnv, x))); // ----------------------------------------------- // mnv without leading or trailing refs should be returned as-is // ----------------------------------------------- var alleleWithoutLeadingRefs = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCCTT", Alternate = "AAAAAA", }; brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithoutLeadingRefs); Assert.Equal(1, brokenOutAlleles.Count()); Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(alleleWithoutLeadingRefs, x))); // ----------------------------------------------- // allele with two leading references should have them broken off into two separate refs plus the rest of the mnv // ----------------------------------------------- var alleleWithLeadingRefs = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCCTT", Alternate = "TTAAAA", }; var expectedLeadingRef1 = new CalledReference() { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "T", Alternate = "T", }; var expectedLeadingRef2 = new CalledReference() { Chromosome = "chr1", Coordinate = 1001, AlleleSupport = 10, Reference = "T", Alternate = "T", }; var expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1002, AlleleSupport = 10, Reference = "CCTT", Alternate = "AAAA", }; brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithLeadingRefs); Assert.Equal(1, brokenOutAlleles.Count()); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef1, x))); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef2, x))); Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x))); // ----------------------------------------------- // allele with two trailing references should have them broken off, leaving just the rest of the mnv // ----------------------------------------------- var alleleWithTrailingRefs = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCCTT", Alternate = "AAAATT", }; var expectedTrailingRef1 = new CalledReference() { Chromosome = "chr1", Coordinate = 1004, AlleleSupport = 10, Reference = "T", Alternate = "T", }; var expectedTrailingRef2 = new CalledReference() { Chromosome = "chr1", Coordinate = 1005, AlleleSupport = 10, Reference = "T", Alternate = "T", }; expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCC", Alternate = "AAAA", }; brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithTrailingRefs); Assert.Equal(1, brokenOutAlleles.Count()); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef1, x))); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef2, x))); Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x))); // ----------------------------------------------- // allele with two leading references and two trailing references should have them broken off into four separate refs plus the rest of the mnv // ----------------------------------------------- var alleleWithLeadingAndTrailingRefs = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1000, AlleleSupport = 10, Reference = "TTCCTT", Alternate = "TTAATT", }; expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv) { Chromosome = "chr1", Coordinate = 1002, AlleleSupport = 10, Reference = "CC", Alternate = "AA", }; brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithLeadingAndTrailingRefs); Assert.Equal(1, brokenOutAlleles.Count()); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef1, x))); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef2, x))); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef1, x))); Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef2, x))); Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x))); }
public static List <BaseCalledAllele> LoadCalledVariantsArray(string[] candidates) { var variants = new List <BaseCalledAllele>(); var columns = new string[0]; foreach (var line in candidates) { { var tokens = line.Split('\t'); if (line.StartsWith("Chromosome")) { columns = tokens; } else { var variant = new CalledVariant(AlleleCategory.Snv); // note doesn't matter what the call type is, vcf writer doesnt care for (var i = 0; i < columns.Length; i++) { var column = columns[i]; var dataValue = tokens[i]; var type = typeof(BaseCalledAllele); var property = type.GetProperty(column); switch (column) { case "Chromosome": case "Reference": case "Alternate": property.SetValue(variant, dataValue); break; case "Coordinate": case "Qscore": case "TotalCoverage": case "AlleleSupport": property.SetValue(variant, Int32.Parse(dataValue)); break; case "FractionNoCalls": property.SetValue(variant, float.Parse(dataValue)); break; case "StrandBiasScore": variant.StrandBiasResults.GATKBiasScore = float.Parse(dataValue); break; case "Filters": var filterStrings = dataValue.Split(','); foreach (var filter in filterStrings) { if (!string.IsNullOrEmpty(filter)) { var filterEnum = (FilterType)Enum.Parse(typeof(FilterType), filter, true); variant.Filters.Add(filterEnum); } } break; case "Genotype": variant.Genotype = (Genotype)Enum.Parse(typeof(Genotype), dataValue, true); break; } } if (variant.Genotype == Genotype.HomozygousRef || variant.Genotype == Genotype.RefLikeNoCall) { variants.Add(Map(variant)); } else { variants.Add(variant); } } } } return(variants); }
public void ComputeCoverage_Spanning_HappyPath() { var deletion = new CalledVariant(AlleleCategory.Deletion) { Coordinate = 1, Reference = "ATCG", Alternate = "A" }; ComputeCoverageTest(deletion, new List <AlleleCount>() { new AlleleCount() { Coordinate = 2, DirectionCoverage = new[] { 10, 100, 20 } // redist = 100, 550, 0 }, new AlleleCount() { Coordinate = 4, DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0 } }, new[] // expect internal average { 375, 650, 0 }); var insertion = new CalledVariant(AlleleCategory.Insertion) { Coordinate = 1, Reference = "A", Alternate = "ATCG" }; ComputeCoverageTest(insertion, new List <AlleleCount>() { new AlleleCount() { Coordinate = 1, DirectionCoverage = new[] { 10, 100, 20 } // redist = 100, 550, 0 }, new AlleleCount() { Coordinate = 2, DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0 } }, new[] // expect min { 100, 550, 0 }); var mnv = new CalledVariant(AlleleCategory.Mnv) { Coordinate = 1, Reference = "CATG", Alternate = "ATCA" }; // For mnvs, take min of first and last datapoints. ComputeCoverageTest(mnv, new List <AlleleCount>() { new AlleleCount() { Coordinate = 1, DirectionCoverage = new[] { 10, 100, 20 } // redist = 100, 550, 0 }, new AlleleCount() { Coordinate = 4, DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0 } }, new[] // expect internal average { 375, 650, 0 }); }