public CallableNeighborhood(VcfNeighborhood vcfNeighborhood, VariantCallingParameters variantCallingParams, ChrReference chrReference = null)
        {
            //housekeeping

            _nbhdGTcalculator = GenotypeCreator.CreateGenotypeCalculator(variantCallingParams.PloidyModel, variantCallingParams.MinimumFrequencyFilter,
                                                                         variantCallingParams.MinimumCoverage,
                                                                         variantCallingParams.DiploidSNVThresholdingParameters,
                                                                         variantCallingParams.DiploidINDELThresholdingParameters,
                                                                         variantCallingParams.AdaptiveGenotypingParameters,
                                                                         variantCallingParams.MinimumGenotypeQScore, variantCallingParams.MaximumGenotypeQScore, variantCallingParams.TargetLODFrequency);


            _vcfNeighborhood        = vcfNeighborhood;
            _acceptedPhasedVariants = new List <CalledAllele>();
            _rejectedPhasedVariants = new List <CalledAllele>();
            UsedRefCountsLookup     = new Dictionary <int, SuckedUpRefRecord>();
            MaxQScore = variantCallingParams.MaximumVariantQScore;

            //prep vcf nbhd for use, so we know the final range of loci in play
            vcfNeighborhood.OrderVariantSitesByFirstTrueStartPosition();
            vcfNeighborhood.SetRangeOfInterest();

            //set reference bases here, then let go of the chr
            if ((chrReference == null) || (chrReference.Sequence == null)) //be gentle if they did not include a ref genome
            {
                NbhdReferenceSequenceSubstring = new String('R', vcfNeighborhood.LastPositionOfInterestWithLookAhead - vcfNeighborhood.FirstPositionOfInterest);
            }
            else
            {
                NbhdReferenceSequenceSubstring = chrReference.Sequence.Substring(vcfNeighborhood.FirstPositionOfInterest - 1, vcfNeighborhood.LastPositionOfInterestWithLookAhead - vcfNeighborhood.FirstPositionOfInterest);
            }
        }
Ejemplo n.º 2
0
 public AlleleCaller(VariantCallerConfig config, ChrIntervalSet intervalSet = null,
                     IVariantCollapser variantCollapser = null, ICoverageCalculator coverageCalculator = null)
 {
     _config             = config;
     _intervalSet        = intervalSet;
     _collapser          = variantCollapser;
     _coverageCalculator = coverageCalculator ?? new CoverageCalculator();
     _genotypeCalculator = config.GenotypeCalculator;
 }
Ejemplo n.º 3
0
        public VcfNeighborhood(VariantCallingParameters variantCallingParams, string refName, VariantSite vs1, VariantSite vs2, string interveningRef)
        {
            _nbhdGTcalculator = GenotypeCreator.CreateGenotypeCalculator(variantCallingParams.PloidyModel, variantCallingParams.MinimumFrequencyFilter,
                                                                         variantCallingParams.MinimumCoverage,
                                                                         variantCallingParams.DiploidThresholdingParameters,
                                                                         variantCallingParams.MinimumGenotpyeQScore, variantCallingParams.MaximumGenotpyeQScore);
            VcfVariantSites         = new List <VariantSite>();
            _referenceName          = refName;
            _acceptedPhasedVariants = new List <CalledAllele>();
            _rejectedPhasedVariants = new List <CalledAllele>();
            UsedRefCountsLookup     = new Dictionary <int, int>();

            AddVariantSite(vs1, vs1.VcfReferenceAllele.Substring(0, 1));
            AddVariantSite(vs2, interveningRef);

            SetID();
        }
        private void ExecuteDiploidMultiAllelicSiteGenotypeTest(IGenotypeCalculator GTC)
        {
            //test cases:
            // (1) SNP + indel + indel
            // (2) indel + SNP + SNP
            // (3) 3 indels (OK, one is low VF)
            // (4) 3 indels (OK, one is low VF)
            // (4) 3 indels - ploidy violation

            Genotype expectedGenotype          = Genotype.HeterozygousAlt1Alt2;
            int      expectedNumAllelesToPrune = 1;

            // (1) SNP + indel + indel
            // should be 1/2 with the lowest freq thrown out
            List <float> refFrequencies = new List <float>()
            {
                0.40F, 0.60F, 0.90F
            };
            List <float> altFrequencies = new List <float>()
            {
                0.60F, 0.40F, 0.10F
            };
            List <string> refAllele = new List <string>()
            {
                "A", "A", "ACT"
            };
            List <string> altAllele = new List <string>()
            {
                "C", "AGGG", "A"
            };
            double coverage = 1000;

            var alleles = new List <CalledAllele>();

            for (int i = 0; i < 3; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.AlleleSupport    = (int)(altFrequencies[i] * coverage);
                variant.TotalCoverage    = (int)coverage;
                variant.ReferenceSupport = (int)(refFrequencies[i] * coverage);
                variant.AlternateAllele  = altAllele[i];
                variant.ReferenceAllele  = refAllele[i];
                alleles.Add(variant);
            }
            alleles[1].Type = AlleleCategory.Insertion;
            alleles[2].Type = AlleleCategory.Deletion;

            //var GTC = new DiploidThresholdingGenotyper();
            GTC.MinDepthToGenotype = 100;
            var allelesToPrune = GTC.SetGenotypes(alleles);

            Assert.Equal(expectedNumAllelesToPrune, allelesToPrune.Count);
            foreach (var allele in alleles)
            {
                Assert.Equal(expectedGenotype, allele.Genotype);
                Assert.Equal(0, allele.Filters.Count());
            }

            Assert.Equal(allelesToPrune[0].ReferenceAllele, "ACT");
            Assert.Equal(allelesToPrune[0].AlternateAllele, "A");
            Assert.Equal(allelesToPrune[0].Frequency, 0.10F);


            // (2) indel + SNP + SNP
            // should be 1/2 with the lowest freq thrown out
            refFrequencies = new List <float>()
            {
                0.40F, 0.20F, 0.20F
            };
            altFrequencies = new List <float>()
            {
                0.60F, 0.10F, 0.40F
            };
            refAllele = new List <string>()
            {
                "A", "A", "A"
            };
            altAllele = new List <string>()
            {
                "ACCAT", "G", "C"
            };


            alleles = new List <CalledAllele>();
            for (int i = 0; i < 3; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.AlleleSupport    = (int)(altFrequencies[i] * coverage);
                variant.TotalCoverage    = (int)coverage;
                variant.ReferenceSupport = (int)(refFrequencies[i] * coverage);
                variant.AlternateAllele  = altAllele[i];
                variant.ReferenceAllele  = refAllele[i];
                alleles.Add(variant);
            }
            alleles[0].Type = AlleleCategory.Insertion;

            GTC.MinDepthToGenotype = 100;
            allelesToPrune         = GTC.SetGenotypes(alleles);

            Assert.Equal(expectedNumAllelesToPrune, allelesToPrune.Count);
            foreach (var allele in alleles)
            {
                Assert.Equal(expectedGenotype, allele.Genotype);
                Assert.Equal(0, allele.Filters.Count());
            }

            Assert.Equal(allelesToPrune[0].ReferenceAllele, "A");
            Assert.Equal(allelesToPrune[0].AlternateAllele, "G");
            Assert.Equal(allelesToPrune[0].Frequency, 0.10F);


            // (3) 3 indels (OK)
            // should be 1/2 with the lowest freq thrown out
            refFrequencies = new List <float>()
            {
                0.40F, 0.90F, 0.60F
            };
            altFrequencies = new List <float>()
            {
                0.60F, 0.10F, 0.40F
            };
            refAllele = new List <string>()
            {
                "A", "ACT", "A"
            };
            altAllele = new List <string>()
            {
                "ACCAT", "A", "CC"
            };

            alleles = new List <CalledAllele>();
            for (int i = 0; i < 3; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.AlleleSupport    = (int)(altFrequencies[i] * coverage);
                variant.TotalCoverage    = (int)coverage;
                variant.ReferenceSupport = (int)(refFrequencies[i] * coverage);
                variant.AlternateAllele  = altAllele[i];
                variant.ReferenceAllele  = refAllele[i];
                alleles.Add(variant);
            }
            alleles[0].Type = AlleleCategory.Insertion;
            alleles[1].Type = AlleleCategory.Deletion;
            alleles[2].Type = AlleleCategory.Insertion;

            GTC.MinDepthToGenotype = 100;
            allelesToPrune         = GTC.SetGenotypes(alleles);

            Assert.Equal(expectedNumAllelesToPrune, allelesToPrune.Count);
            foreach (var allele in alleles)
            {
                Assert.Equal(expectedGenotype, allele.Genotype);
                Assert.Equal(0, allele.Filters.Count());
            }

            Assert.Equal(allelesToPrune[0].ReferenceAllele, "ACT");
            Assert.Equal(allelesToPrune[0].AlternateAllele, "A");
            Assert.Equal(allelesToPrune[0].Frequency, 0.10F);


            // (4) 3 indels - excused ploidy violation,
            // b /c its possible to have these together in a diploid individual
            // should be 1/2 with the lowest freq thrown out
            refFrequencies = new List <float>()
            {
                0.60F, 0.60F, 0.60F
            };
            altFrequencies = new List <float>()
            {
                0.31F, 0.30F, 0.31F
            };
            refAllele = new List <string>()
            {
                "A", "ACT", "A"
            };
            altAllele = new List <string>()
            {
                "ACCAT", "A", "AC"
            };

            expectedGenotype = Genotype.HeterozygousAlt1Alt2;
            alleles          = new List <CalledAllele>();
            for (int i = 0; i < 3; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.AlleleSupport    = (int)(altFrequencies[i] * coverage);
                variant.TotalCoverage    = (int)coverage;
                variant.ReferenceSupport = (int)(refFrequencies[i] * coverage);
                variant.AlternateAllele  = altAllele[i];
                variant.ReferenceAllele  = refAllele[i];
                alleles.Add(variant);
            }
            alleles[0].Type = AlleleCategory.Insertion;
            alleles[1].Type = AlleleCategory.Deletion;
            alleles[2].Type = AlleleCategory.Insertion;

            GTC.MinDepthToGenotype = 100;
            allelesToPrune         = GTC.SetGenotypes(alleles);

            Assert.Equal(expectedNumAllelesToPrune, allelesToPrune.Count);
            foreach (var allele in alleles)
            {
                Assert.Equal(expectedGenotype, allele.Genotype);
                Assert.Equal(0, allele.Filters.Count);
            }

            Assert.Equal(allelesToPrune[0].ReferenceAllele, "ACT");
            Assert.Equal(allelesToPrune[0].AlternateAllele, "A");
            Assert.Equal(allelesToPrune[0].Frequency, 0.30F);


            //
            // (5) 3 SNPS - NOT excused ploidy violation,
            // b /c its NOT possible to thave these together in a diploid individual
            // should be ./. with the lowest freq thrown out
            refFrequencies = new List <float>()
            {
                0.01F, 0.01F, 0.01F
            };
            altFrequencies = new List <float>()
            {
                0.31F, 0.30F, 0.31F
            };
            refAllele = new List <string>()
            {
                "A", "A", "A"
            };
            altAllele = new List <string>()
            {
                "C", "T", "G"
            };

            expectedGenotype = Genotype.Alt12LikeNoCall;
            alleles          = new List <CalledAllele>();
            for (int i = 0; i < 3; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.AlleleSupport    = (int)(altFrequencies[i] * coverage);
                variant.TotalCoverage    = (int)coverage;
                variant.ReferenceSupport = (int)(refFrequencies[i] * coverage);
                variant.AlternateAllele  = altAllele[i];
                variant.ReferenceAllele  = refAllele[i];
                alleles.Add(variant);
            }
            alleles[0].Type = AlleleCategory.Snv;
            alleles[1].Type = AlleleCategory.Snv;
            alleles[2].Type = AlleleCategory.Snv;

            //GTC = new DiploidThresholdingGenotyper();
            GTC.MinDepthToGenotype = 100;
            allelesToPrune         = GTC.SetGenotypes(alleles);

            Assert.Equal(expectedNumAllelesToPrune, allelesToPrune.Count);
            foreach (var allele in alleles)
            {
                Assert.Equal(expectedGenotype, allele.Genotype);
                Assert.Equal(FilterType.MultiAllelicSite, allele.Filters[0]);
            }

            Assert.Equal(allelesToPrune[0].ReferenceAllele, "A");
            Assert.Equal(allelesToPrune[0].AlternateAllele, "T");
            Assert.Equal(allelesToPrune[0].Frequency, 0.30F);
        }