private StrandBiasResults ExecuteTest(Tuple <double, int> forwardStats, Tuple <double, int> reverseStats, Tuple <double, int> stitchedStats,
                                              int estimatedBaseCallQuality = 20, float threshold = 0.5f, StrandBiasModel model = StrandBiasModel.Poisson)
        {
            var origForwardSupport  = (int)(forwardStats.Item1 * forwardStats.Item2);
            var origReverseSupport  = (int)(reverseStats.Item1 * reverseStats.Item2);
            var origStitchedSupport = (int)(stitchedStats.Item1 * stitchedStats.Item2);
            var support             = new int[]
            {
                origForwardSupport,
                origReverseSupport,
                origStitchedSupport,
            };

            var variant = new CalledVariant(AlleleCategory.Snv)
            {
                TotalCoverageByDirection = new int[]
                {
                    forwardStats.Item2, reverseStats.Item2, stitchedStats.Item2
                }
            };

            StrandBiasCalculator.Compute(variant, support, estimatedBaseCallQuality, threshold, model);
            Assert.Equal(origForwardSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ForwardStats.Support);
            Assert.Equal(origReverseSupport + ((float)origStitchedSupport / 2), variant.StrandBiasResults.ReverseStats.Support);
            return(variant.StrandBiasResults);
        }
        public void Compute()
        {
            // Based on Tamsen's original PValue test, just extended to our Compute method
            List <int[]> SampleValues_ExpectedQScore = new List <int[]>() //coverage,var calls}
            {
                new int[] { 100, 0, 0 },
                new int[] { 100, 1, 2 },
                new int[] { 100, 5, 24 },
                new int[] { 200, 10, 43 },
                new int[] { 500, 25, 98 },
                new int[] { 5000, 250, 100 },
            };

            foreach (int[] item in SampleValues_ExpectedQScore)
            {
                var variant = new CalledVariant(AlleleCategory.Snv)
                {
                    Coordinate    = 1,
                    Reference     = "A",
                    Alternate     = "T",
                    TotalCoverage = item[0],
                    AlleleSupport = item[1],
                };

                QualityCalculator.Compute(variant, 100, 20);

                Assert.Equal(item[2], variant.Qscore);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Calculation for spanning variants requires looking at two datapoints and reconciling the coverage between the two.
        /// For insertions, take min of preceeding and trailing datapoints.
        /// For deletions and mnvs, take average of first and last datapoint for variant.
        /// </summary>
        private static void CalculateSpanning(CalledVariant variant, IStateManager alleleCountSource, int startPointPosition, int endPointPosition, bool anchored = true)
        {
            //empty arrays to do our coverage calculations.  the three spaces are for each read direction.
            var startPointCoverage = new[] { 0, 0, 0 };
            var endPointCoverage   = new[] { 0, 0, 0 };

            // sum coverage by direction across all allele types for each data point
            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
            {
                foreach (var alleleType in Constants.CoverageContributingAlleles)
                {
                    startPointCoverage[directionIndex] += alleleCountSource.GetAlleleCount(startPointPosition, alleleType, (DirectionType)directionIndex);
                    endPointCoverage[directionIndex]   += alleleCountSource.GetAlleleCount(endPointPosition, alleleType, (DirectionType)directionIndex);
                }
            }

            // coverage by strand direction is used for strand bias.  need to redistribute stitched contribution to forward and reverse directions for book-ends before reconciling them.
            RedistributeStitchedCoverage(startPointCoverage);
            RedistributeStitchedCoverage(endPointCoverage);

            // intentionally leave stitched coverage empty when calculating for a spanned variant (it's already been redistributed)
            for (var directionIndex = 0; directionIndex < 2; directionIndex++)
            {
                variant.TotalCoverageByDirection[directionIndex] = anchored ? (startPointCoverage[directionIndex] + endPointCoverage[directionIndex]) / 2 :
                                                                   Math.Min(startPointCoverage[directionIndex], endPointCoverage[directionIndex]);
            }

            // coverage should be total across the directions.
            variant.TotalCoverage    = variant.TotalCoverageByDirection.Sum();
            variant.ReferenceSupport = Math.Max(0, variant.TotalCoverage - variant.AlleleSupport);
        }
        public void ComputeCoverage_Point_WithGappedMnvTakingSupport()
        {
            var variant = new CalledVariant(AlleleCategory.Snv)
            {
                Coordinate    = 1,
                Reference     = "A",
                Alternate     = "T",
                AlleleSupport = 10
            };

            //Although we make total ref support 53 below, 50 of it is "taken" by a gapped MNV, so we only expect 3 true ref support
            ComputeCoverageTest(variant, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    AlleleType        = AlleleType.T,
                    Coordinate        = 1, // coverage should only take into account the coordinate we're at
                    DirectionCoverage = new [] { 100, 101, 111 }
                },
                new AlleleCount()
                {
                    AlleleType        = AlleleType.A,
                    Coordinate        = 1, // coverage should only take into account the coordinate we're at
                    DirectionCoverage = new [] { 21, 32, 0 }
                }
            },
                                new[]
            {
                121,
                133,
                111
            },
                                expectedSnvRef: 3, takenRefSupport: 50);
        }
        public void ComputeCoverage_SupportGreaterThanCoverage()
        {
            //This shouldn't happen but don't barf
            var variant = new CalledVariant(AlleleCategory.Deletion)
            {
                Coordinate = 1,
                Reference  = "ATCG",
                Alternate  = "A",
            };

            ComputeCoverageTest(variant, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    Coordinate        = 2,
                    DirectionCoverage = new [] { 1, 1, 1 }
                },
                new AlleleCount()
                {
                    Coordinate        = 4,
                    DirectionCoverage = new [] { 1, 1, 1 }
                }
            },
                                new []
            {
                8, 7, 0
            }, false, 100);

            //Reference support should be 0
            Assert.Equal(0, variant.ReferenceSupport);
        }
        public void ComputeCoverage_ZeroCoverage()
        {
            var variant = new CalledVariant(AlleleCategory.Deletion)
            {
                Coordinate    = 1,
                Reference     = "ATCG",
                Alternate     = "A",
                AlleleSupport = 0
            };

            Action test = () => ComputeCoverageTest(variant, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    Coordinate        = 2,
                    DirectionCoverage = new [] { 0, 0, 0 }
                },
                new AlleleCount()
                {
                    Coordinate        = 4,
                    DirectionCoverage = new [] { 0, 0, 0 }
                }
            },
                                                    new []
            {
                0, 0, 0
            }, false);

            test();

            //Reference support should be 0
            Assert.Equal(0, variant.ReferenceSupport);

            //Frequency should be 0 (and not barf)
            Assert.Equal(0, variant.Frequency);

            //Now try the case where the VariantSupport is non-zero but the
            //allele counts are zero (shouldn't happen but don't barf)
            variant.AlleleSupport = 10;
            test();

            //Reference support should be 0
            Assert.Equal(0, variant.ReferenceSupport);

            //Frequency should be 0 (and not barf)
            Assert.Equal(0, variant.Frequency);
        }
        public void ComputeCoverage_Point_HappyPath()
        {
            var variant = new CalledVariant(AlleleCategory.Snv)
            {
                Coordinate    = 1,
                Reference     = "A",
                Alternate     = "T",
                AlleleSupport = 10
            };

            ComputeCoverageTest(variant, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    AlleleType        = AlleleType.T,
                    Coordinate        = 1, // coverage should only take into account the coordinate we're at
                    DirectionCoverage = new [] { 100, 101, 111 }
                },
                //Ref allele
                new AlleleCount()
                {
                    AlleleType        = AlleleType.A,
                    Coordinate        = 1,
                    DirectionCoverage = new [] { 1, 2, 0 }
                },
                //Coverage should consider other non-ref alleles, but ref support should not
                new AlleleCount()
                {
                    AlleleType        = AlleleType.C,
                    Coordinate        = 1, // coverage should only take into account the coordinate we're at
                    DirectionCoverage = new [] { 5, 10, 1 }
                }
            },
                                new []
            {
                106,
                113,
                112   //Stitched coverage is not reallocated here in the point-mutation case,
            },
                                expectedSnvRef: 3);
        }
Esempio n. 8
0
        public void BreakOffEdgeReferences()
        {
            // -----------------------------------------------
            // non-mnv should be returned as-is
            // -----------------------------------------------

            var nonMnv = new CalledVariant(AlleleCategory.Deletion)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCCTT",
                Alternate     = "T",
            };
            var brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(nonMnv);

            Assert.Equal(1, brokenOutAlleles.Count());
            Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(nonMnv, x)));

            // -----------------------------------------------
            // mnv without leading or trailing refs should be returned as-is
            // -----------------------------------------------

            var alleleWithoutLeadingRefs = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCCTT",
                Alternate     = "AAAAAA",
            };

            brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithoutLeadingRefs);
            Assert.Equal(1, brokenOutAlleles.Count());
            Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(alleleWithoutLeadingRefs, x)));

            // -----------------------------------------------
            // allele with two leading references should have them broken off into two separate refs plus the rest of the mnv
            // -----------------------------------------------

            var alleleWithLeadingRefs = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCCTT",
                Alternate     = "TTAAAA",
            };

            var expectedLeadingRef1 = new CalledReference()
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "T",
                Alternate     = "T",
            };
            var expectedLeadingRef2 = new CalledReference()
            {
                Chromosome    = "chr1",
                Coordinate    = 1001,
                AlleleSupport = 10,
                Reference     = "T",
                Alternate     = "T",
            };
            var expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1002,
                AlleleSupport = 10,
                Reference     = "CCTT",
                Alternate     = "AAAA",
            };

            brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithLeadingRefs);
            Assert.Equal(1, brokenOutAlleles.Count());
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef1, x)));
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef2, x)));
            Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x)));

            // -----------------------------------------------
            // allele with two trailing references should have them broken off, leaving just the rest of the mnv
            // -----------------------------------------------

            var alleleWithTrailingRefs = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCCTT",
                Alternate     = "AAAATT",
            };
            var expectedTrailingRef1 = new CalledReference()
            {
                Chromosome    = "chr1",
                Coordinate    = 1004,
                AlleleSupport = 10,
                Reference     = "T",
                Alternate     = "T",
            };
            var expectedTrailingRef2 = new CalledReference()
            {
                Chromosome    = "chr1",
                Coordinate    = 1005,
                AlleleSupport = 10,
                Reference     = "T",
                Alternate     = "T",
            };

            expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCC",
                Alternate     = "AAAA",
            };

            brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithTrailingRefs);
            Assert.Equal(1, brokenOutAlleles.Count());
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef1, x)));
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef2, x)));
            Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x)));


            // -----------------------------------------------
            // allele with two leading references and two trailing references should have them broken off into four separate refs plus the rest of the mnv
            // -----------------------------------------------

            var alleleWithLeadingAndTrailingRefs = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1000,
                AlleleSupport = 10,
                Reference     = "TTCCTT",
                Alternate     = "TTAATT",
            };

            expectedRemainingMnv = new CalledVariant(AlleleCategory.Mnv)
            {
                Chromosome    = "chr1",
                Coordinate    = 1002,
                AlleleSupport = 10,
                Reference     = "CC",
                Alternate     = "AA",
            };

            brokenOutAlleles = MnvReallocator.BreakOffEdgeReferences(alleleWithLeadingAndTrailingRefs);
            Assert.Equal(1, brokenOutAlleles.Count());
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef1, x)));
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedLeadingRef2, x)));
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef1, x)));
            Assert.Equal(0, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedTrailingRef2, x)));
            Assert.Equal(1, brokenOutAlleles.Count(x => VerifyCalledAlleleMatch(expectedRemainingMnv, x)));
        }
        public static List <BaseCalledAllele> LoadCalledVariantsArray(string[] candidates)
        {
            var variants = new List <BaseCalledAllele>();
            var columns  = new string[0];

            foreach (var line in candidates)
            {
                {
                    var tokens = line.Split('\t');

                    if (line.StartsWith("Chromosome"))
                    {
                        columns = tokens;
                    }
                    else
                    {
                        var variant = new CalledVariant(AlleleCategory.Snv); // note doesn't matter what the call type is, vcf writer doesnt care
                        for (var i = 0; i < columns.Length; i++)
                        {
                            var column    = columns[i];
                            var dataValue = tokens[i];

                            var type     = typeof(BaseCalledAllele);
                            var property = type.GetProperty(column);

                            switch (column)
                            {
                            case "Chromosome":
                            case "Reference":
                            case "Alternate":
                                property.SetValue(variant, dataValue);
                                break;

                            case "Coordinate":
                            case "Qscore":
                            case "TotalCoverage":
                            case "AlleleSupport":
                                property.SetValue(variant, Int32.Parse(dataValue));
                                break;

                            case "FractionNoCalls":
                                property.SetValue(variant, float.Parse(dataValue));
                                break;

                            case "StrandBiasScore":
                                variant.StrandBiasResults.GATKBiasScore = float.Parse(dataValue);
                                break;

                            case "Filters":
                                var filterStrings = dataValue.Split(',');
                                foreach (var filter in filterStrings)
                                {
                                    if (!string.IsNullOrEmpty(filter))
                                    {
                                        var filterEnum = (FilterType)Enum.Parse(typeof(FilterType), filter, true);
                                        variant.Filters.Add(filterEnum);
                                    }
                                }
                                break;

                            case "Genotype":
                                variant.Genotype = (Genotype)Enum.Parse(typeof(Genotype), dataValue, true);
                                break;
                            }
                        }

                        if (variant.Genotype == Genotype.HomozygousRef || variant.Genotype == Genotype.RefLikeNoCall)
                        {
                            variants.Add(Map(variant));
                        }
                        else
                        {
                            variants.Add(variant);
                        }
                    }
                }
            }

            return(variants);
        }
Esempio n. 10
0
        public void ComputeCoverage_Spanning_HappyPath()
        {
            var deletion = new CalledVariant(AlleleCategory.Deletion)
            {
                Coordinate = 1,
                Reference  = "ATCG",
                Alternate  = "A"
            };

            ComputeCoverageTest(deletion, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    Coordinate        = 2,
                    DirectionCoverage = new[] { 10, 100, 20 }  // redist = 100, 550, 0
                },
                new AlleleCount()
                {
                    Coordinate        = 4,
                    DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0
                }
            },
                                new[] // expect internal average
            {
                375, 650, 0
            });

            var insertion = new CalledVariant(AlleleCategory.Insertion)
            {
                Coordinate = 1,
                Reference  = "A",
                Alternate  = "ATCG"
            };

            ComputeCoverageTest(insertion, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    Coordinate        = 1,
                    DirectionCoverage = new[] { 10, 100, 20 } // redist = 100, 550, 0
                },
                new AlleleCount()
                {
                    Coordinate        = 2,
                    DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0
                }
            },
                                new[] // expect min
            {
                100, 550, 0
            });

            var mnv = new CalledVariant(AlleleCategory.Mnv)
            {
                Coordinate = 1,
                Reference  = "CATG",
                Alternate  = "ATCA"
            };

            // For mnvs, take min of first and last datapoints.
            ComputeCoverageTest(mnv, new List <AlleleCount>()
            {
                new AlleleCount()
                {
                    Coordinate        = 1,
                    DirectionCoverage = new[] { 10, 100, 20 } // redist = 100, 550, 0
                },
                new AlleleCount()
                {
                    Coordinate        = 4,
                    DirectionCoverage = new[] { 30, 50, 200 } // redist = 650, 750, 0
                }
            },
                                new[] // expect internal average
            {
                375, 650, 0
            });
        }