예제 #1
0
        public double GetProbOfAGivenB(VariantSite site)
        {
            //what is chance of A given B
            //P(A|B) = P (A in a cluster with with B ) / P (B)
            //so...
            //P(B) = #Bs / total reads
            //P(A in a cluster with with B ) = #Bs with As / total reads.

            CheckVariantSiteTracked(site);

            var probOfAandB = CalculateProbability(_supportOfAAndB[site]);
            var probOfB     = CalculateProbability(_supportOfB[site]);

            Console.WriteLine("Prob of A and B: " + probOfAandB);
            Console.WriteLine("Prob of B: " + probOfB);

            var probOfAGivenB = probOfAandB / probOfB;

            if (probOfB < ApproximatelyZero)
            {
                probOfAGivenB = 0;
            }

            return(probOfAGivenB);
        }
예제 #2
0
 private static void AddSupport(Dictionary <VariantSite, double> dictionary, VariantSite site, double amount)
 {
     if (!dictionary.ContainsKey(site))
     {
         dictionary.Add(site, 0);
     }
     dictionary[site] += amount;
 }
예제 #3
0
 private void CheckVariantSiteTracked(VariantSite site)
 {
     if (!_supportOfB.ContainsKey(site))
     {
         throw new Exception(
                   string.Format(
                       "Support for VariantSite '{0}' is not being tracked in relation to VariantSite '{1}'", site,
                       VariantA));
     }
 }
예제 #4
0
        public VcfNeighborhood(int nbhdNum, string refName, VariantSite vs1, VariantSite vs2)
        {
            VcfVariantSites = new List <VariantSite>();
            _referenceName  = refName;

            AddVariantSite(vs1);
            AddVariantSite(vs2);

            SetID(nbhdNum);
        }
예제 #5
0
 public void AddVariantSite(VariantSite variantSite)
 {
     VcfVariantSites.Add(variantSite.DeepCopy());
     if (variantSite.IsPassing)
     {
         PassingVariants++;
     }
     else
     {
         NonPassingVariants++;
     }
 }
예제 #6
0
        public double GetWeightedProbOfAGivenB(VariantSite site)
        {
            CheckVariantSiteTracked(site);

            var weightedProbOfB     = CalculateProbability(_weightedSupportOfB[site]);
            var weightedProbOfAandB = CalculateProbability(_weightedSupportOfAAndB[site]);

            var weightedProbOfAGivenB = weightedProbOfAandB / weightedProbOfB;

            if (weightedProbOfB < ApproximatelyZero)
            {
                weightedProbOfAGivenB = 0;
            }

            return(weightedProbOfAGivenB);
        }
예제 #7
0
        private int GetVeadCountsForVariantSite(VariantSite vs)
        {
            var support = 0;

            foreach (var rg in _veadGroups)
            {
                //all veads are the same within a group, so just choose one.
                var vead = rg.RepresentativeVead;

                support +=
                    vead.SiteResults.Where(vsRead => vs.VcfReferencePosition == vsRead.VcfReferencePosition &&
                                           (vs.VcfReferenceAllele == vsRead.VcfReferenceAllele) &&
                                           (vs.VcfAlternateAllele == vsRead.VcfAlternateAllele)).Sum(vsRead => rg.NumVeads);
            }

            return(support);
        }
예제 #8
0
        public VcfNeighborhood(VariantCallingParameters variantCallingParams, int nbhdNum,
                               string refName, VariantSite vs1, VariantSite vs2, string interveningRef)
        {
            _nbhdGTcalculator = GenotypeCreator.CreateGenotypeCalculator(variantCallingParams.PloidyModel, variantCallingParams.MinimumFrequencyFilter,
                                                                         variantCallingParams.MinimumCoverage,
                                                                         variantCallingParams.DiploidSNVThresholdingParameters,
                                                                         variantCallingParams.DiploidINDELThresholdingParameters,
                                                                         variantCallingParams.MinimumGenotypeQScore, variantCallingParams.MaximumGenotypeQScore, variantCallingParams.TargetLODFrequency);
            VcfVariantSites         = new List <VariantSite>();
            _referenceName          = refName;
            _acceptedPhasedVariants = new List <CalledAllele>();
            _rejectedPhasedVariants = new List <CalledAllele>();
            UsedRefCountsLookup     = new Dictionary <int, SuckedUpRefRecord>();

            AddVariantSite(vs1, vs1.VcfReferenceAllele.Substring(0, 1));
            AddVariantSite(vs2, interveningRef);

            SetID(nbhdNum);
        }
예제 #9
0
        public VariantPhasingResult(VariantSite vsA, IEnumerable <VariantSite> variantGroup, int totalNumClusters)
        {
            VariantA          = vsA;
            _totalNumClusters = totalNumClusters;

            // Initialize counters
            _supportOfB             = new Dictionary <VariantSite, double>();
            _supportOfAAndB         = new Dictionary <VariantSite, double>();
            _weightedSupportOfB     = new Dictionary <VariantSite, double>();
            _weightedSupportOfAAndB = new Dictionary <VariantSite, double>();

            foreach (var vsB in variantGroup)
            {
                // Initialize support counters for "other" ("B") variants
                _supportOfB.Add(vsB, 0);
                _supportOfAAndB.Add(vsB, 0);
                _weightedSupportOfB.Add(vsB, 0);
                _weightedSupportOfAAndB.Add(vsB, 0);
            }
        }
예제 #10
0
 public bool LastPositionIsNotMatch(VariantSite variantSite)
 {
     return(VcfVariantSites.Last().VcfReferencePosition != variantSite.VcfReferencePosition);
 }
예제 #11
0
        public void CreateMnvsFromClusters(IEnumerable <ICluster> clusters, int qNoiselevel, int maxQscore, bool crushNbhd = false)
        {
            if (clusters == null)
            {
                return;
            }
            if (clusters.Count() == 0)
            {
                return;
            }

            var depthAtSites   = new int[0];
            var nocallsAtSites = new int[0];

            DepthAtSites(clusters, out depthAtSites, out nocallsAtSites);

            Logger.WriteToLog("Creating MNVs from clusters.");

            int anchorPosition = -1;

            //if we are crushing the vcf, or in diploid mode, always report all phased alleles throug the nbhd, starting at the first position of interest. (ie, the first position we started phasing on)
            //If we are in somatic mode or uncrushed mode, we just report the variants at the loci we find them on (normal Pisces)
            if (crushNbhd || _nbhdGTcalculator.PloidyModel == Pisces.Domain.Types.PloidyModel.Diploid)
            {
                anchorPosition = FirstPositionOfInterest;
            }


            foreach (var cluster in clusters)
            {
                CalledAllele mnv;

                var clusterConsensus = cluster.GetConsensusSites();

                Logger.WriteToLog(cluster.Name + "\tVariantSites\t" + VariantSite.ArrayToString(clusterConsensus));
                Logger.WriteToLog(cluster.Name + "\tVariantPositions\t" + VariantSite.ArrayToPositions(clusterConsensus));


                var referenceRemoval = PhasedVariantExtractor.Extract(out mnv, clusterConsensus,
                                                                      ReferenceSequence, depthAtSites, nocallsAtSites, cluster.CountsAtSites, ReferenceName, qNoiselevel, maxQscore, anchorPosition);

                if ((mnv.Type != Pisces.Domain.Types.AlleleCategory.Reference) && mnv.AlleleSupport != 0)
                {
                    Logger.WriteToLog(cluster.Name + "mnv accepted:\t" + mnv.ToString());
                    AddAcceptedPhasedVariant(mnv);

                    //keep track of reference calls sucked into MNVs.
                    //We will need to subtract this from the ref counts when we write out the final vcf.
                    foreach (var refPosition in referenceRemoval.Keys)
                    {
                        if (!UsedRefCountsLookup.ContainsKey(refPosition))
                        {
                            var suckedUpRefRecord = new SuckedUpRefRecord()
                            {
                                Counts = 0, AlleleThatClaimedIt = mnv
                            };
                            UsedRefCountsLookup.Add(refPosition, suckedUpRefRecord);
                        }

                        UsedRefCountsLookup[refPosition].Counts += referenceRemoval[refPosition].Counts;
                    }
                }
                else if (mnv.TotalCoverage != 0) //dont add empty stuff..
                {
                    Logger.WriteToLog("mnv rejected:\t" + mnv.ToString());
                    AddRejectedPhasedVariant(mnv);
                }
            }
            foreach (var phasedVariant in CandidateVariants)
            {
                var calledPhasedVariant = phasedVariant as CalledAllele;
                if (calledPhasedVariant == null)
                {
                    continue;
                }

                calledPhasedVariant.ReferenceSupport = phasedVariant.TotalCoverage - phasedVariant.AlleleSupport;
                if (UsedRefCountsLookup.ContainsKey(phasedVariant.ReferencePosition) && (UsedRefCountsLookup[phasedVariant.ReferencePosition].AlleleThatClaimedIt != phasedVariant))
                {
                    calledPhasedVariant.ReferenceSupport = calledPhasedVariant.ReferenceSupport - UsedRefCountsLookup[phasedVariant.ReferencePosition].Counts;
                }

                calledPhasedVariant.ReferenceSupport = Math.Max(0, calledPhasedVariant.ReferenceSupport);
            }
        }
예제 #12
0
 public void AddVariantSite(VariantSite variantSite, string refSinceLastVariant)
 {
     ReferenceSequence += refSinceLastVariant;
     VcfVariantSites.Add(variantSite.DeepCopy());
 }
예제 #13
0
        public void AddMnvsFromClusters(IEnumerable <ICluster> clusters, int qNoiselevel, int maxQscore, bool crushNbhd = false)
        {
            if (clusters == null)
            {
                return;
            }
            if (clusters.Count() == 0)
            {
                return;
            }

            var depthAtSites = DepthAtSites(clusters);

            Logger.WriteToLog("Creating MNVs from clusters.");

            int anchorPosition = -1;


            foreach (var cluster in clusters)
            {
                CalledAllele mnv;

                var clusterConsensus = cluster.GetConsensusSites();

                if (crushNbhd && (anchorPosition == -1))
                {
                    anchorPosition = clusterConsensus.First().VcfReferencePosition;
                }

                Logger.WriteToLog(cluster.Name + "\tVariantSites\t" + VariantSite.ArrayToString(clusterConsensus));
                Logger.WriteToLog(cluster.Name + "\tVariantPositions\t" + VariantSite.ArrayToPositions(clusterConsensus));


                var referenceRemoval = PhasedVariantExtractor.Extract(out mnv, clusterConsensus,
                                                                      ReferenceSequence, depthAtSites.ToList(), cluster.CountsAtSites, ReferenceName, qNoiselevel, maxQscore, anchorPosition);

                if ((mnv.Type != Pisces.Domain.Types.AlleleCategory.Reference) && mnv.AlleleSupport != 0)
                {
                    Logger.WriteToLog(cluster.Name + "mnv accepted:\t" + mnv.ToString());
                    AddAcceptedPhasedVariant(mnv);

                    //keep track of reference calls sucked into MNVs.
                    //We will need to subtract this from the ref counts when we write out the final vcf.
                    foreach (var refPosition in referenceRemoval.Keys)
                    {
                        if (!UsedRefCountsLookup.ContainsKey(refPosition))
                        {
                            UsedRefCountsLookup.Add(refPosition, 0);
                        }

                        UsedRefCountsLookup[refPosition] += referenceRemoval[refPosition];
                    }
                }
                else if (mnv.TotalCoverage != 0) //dont add empty stuff..
                {
                    Logger.WriteToLog("mnv rejected:\t" + mnv.ToString());
                    AddRejectedPhasedVariant(mnv);
                }
            }
            foreach (var phasedVariant in CandidateVariants)
            {
                var calledPhasedVariant = phasedVariant as CalledAllele;
                if (calledPhasedVariant == null)
                {
                    continue;
                }

                calledPhasedVariant.ReferenceSupport = phasedVariant.TotalCoverage - phasedVariant.AlleleSupport;
                if (UsedRefCountsLookup.ContainsKey(phasedVariant.Coordinate))
                {
                    calledPhasedVariant.ReferenceSupport = calledPhasedVariant.ReferenceSupport - UsedRefCountsLookup[phasedVariant.Coordinate];
                }

                calledPhasedVariant.ReferenceSupport = Math.Max(0, calledPhasedVariant.ReferenceSupport);
            }
        }
예제 #14
0
 public string ToVariantSequence()
 {
     return(VariantSite.ArrayToString(SiteResults));
 }
예제 #15
0
 public void AddSupportForAandB(VariantSite site, double support)
 {
     AddSupport(_supportOfAAndB, site, 1);
     AddSupport(_weightedSupportOfAAndB, site, support);
 }
예제 #16
0
        private static VariantPhasingResult GetPhasingProbabilitiesForVariant(List <VariantSite> variantGroup, SetOfClusters clusters, VariantSite variantSiteA)
        {
            var otherVariants = variantGroup.Where(vs => vs != variantSiteA).ToList();

            var phasingResult = new VariantPhasingResult(variantSiteA, otherVariants, clusters.NumClusters);

            var relativeWeights = clusters.GetRelativeWeights();

            //how many clusters have B in them
            //how many clusters have A and B in them?

            foreach (var cluster in clusters.Clusters)
            {
                var supportDict = cluster.GetVeadCountsInCluster(variantGroup);

                var weight = relativeWeights[cluster.Name];

                foreach (var variantSiteB in otherVariants)
                {
                    if (supportDict[variantSiteB] <= 0)
                    {
                        continue;
                    }
                    phasingResult.AddSupportForB(variantSiteB, weight);


                    if (supportDict[variantSiteA] > 0)
                    {
                        phasingResult.AddSupportForAandB(variantSiteB, weight);
                    }
                }
            }
            return(phasingResult);
        }