예제 #1
0
        public static void TryMergeAllClusters(SetOfClusters clusterSet, int maxNumDisagreements)
        {
            var clusters = clusterSet.Clusters;

            for (var i = 0; i < clusters.Count(); i++)
            {
                for (var j = i + 1; j < clusters.Count(); j++)
                {
                    var clusterA = clusters[i];
                    var clusterB = clusters[j];

                    var canBeMerged = TestCanBeMerged(clusterA, clusterB, maxNumDisagreements);

                    if (!canBeMerged)
                    {
                        continue;
                    }

                    clusterSet.RemoveCluster(clusterA.Name);
                    clusterSet.RemoveCluster(clusterB.Name);

                    var mergedCluster = MergeClusters(clusterA, clusterB);
                    clusterSet.AddCluster(mergedCluster);
                }
            }
        }
        private SetOfClusters CreateDefaultSetOfClusters()
        {
            var clusteringParams = new ClusteringParameters()
            {
                MaxNumberDisagreements = 0, MinNumberAgreements = 0
            };
            var setOfClusters = new SetOfClusters(clusteringParams);

            Assert.Equal(0, setOfClusters.Clusters.Count());

            return(setOfClusters);
        }
예제 #3
0
        public void GetPhasingProbabilities()
        {
            var variantSites = new List <VariantSite>
            {
                new VariantSite(1),
                new VariantSite(2),
                new VariantSite(45)
            };

            var clusters = new SetOfClusters(new ClusteringParameters());

            // There should be a PhasingResult for each variant in variantSites
            var phasingProbabilities = VariantPhasingResult.GetPhasingProbabilities(variantSites, clusters);

            Assert.Equal(variantSites.Count, phasingProbabilities.Count);
            Assert.Equal(variantSites.Select(x => x), phasingProbabilities.Keys.ToList());
        }
예제 #4
0
        public static Cluster MergeAllBestCandidates(SetOfClusters clusters, int maxNumDisagreements,
                                                     List <Cluster> bestCandidates, VeadGroup testVeadGroup)
        {
            var numCandidates = bestCandidates.Count;
            var bestcluster   = bestCandidates[0];

            for (var i = 0; i < numCandidates; i++)
            {
                for (var j = i + 1; j < numCandidates; j++)
                {
                    // First test if the clusters can be merged. If they can, merge them.
                    // If they can't, return the better of the two.
                    var clusterA = bestCandidates[i];
                    var clusterB = bestCandidates[j];

                    var canBeMerged = TestCanBeMerged(clusterA, clusterB, maxNumDisagreements, testVeadGroup);

                    if (canBeMerged)
                    {
                        clusters.RemoveCluster(clusterA.Name);
                        clusters.RemoveCluster(clusterB.Name);

                        var mergedCluster = MergeClusters(clusterA, clusterB);
                        clusters.AddCluster(mergedCluster);
                        bestcluster = mergedCluster;
                    }
                    else
                    {
                        if (clusterB.NumVeads > clusterA.NumVeads)
                        {
                            bestcluster = clusterB;
                        }
                        //else, leave it as      bestcluster = cA;
                    }
                }
            }
            return(bestcluster);
        }
        private void MeetPloidyConstraints(SetOfClusters clusters)
        {
            while (clusters.NumClusters > _options.ClusterConstraint)
            {
                Logger.WriteToLog("Num clusters: " + clusters.Clusters.Length);
                Logger.WriteToLog("Num cluster constraint " + _options.ClusterConstraint + " is violated.  Pruning clusters...");
                int maxAllowedToRemove = (clusters.NumClusters - _options.ClusterConstraint);
                int numWorstClusters   = clusters.RemoveWorstClusters(maxAllowedToRemove);

                if (numWorstClusters <= maxAllowedToRemove)
                {
                    Logger.WriteToLog(numWorstClusters + " clusters pruned.");
                }
                else
                {
                    Logger.WriteToLog(numWorstClusters + " low ranked clusters found. This is not resolveable with our cluster constraints.");
                    // then we had a tie situation, and we do not know how to proceed.
                    //if this is not resolvable, we are going to fail our cluster constraint.
                    break;
                }
            }
            Logger.WriteToLog("Clusters finalized: " + clusters.Clusters.Length);
        }
예제 #6
0
        public void MergeAllBestCandidates()
        {
            var setOfClusters = new SetOfClusters(new ClusteringParameters());
            var veadgroups    = ClusterTestHelpers.GetSampleVeadGroups(prefix: "Original");
            var cluster       = new Cluster("test", veadgroups);

            setOfClusters.AddCluster(cluster);

            var veadgroups2 = ClusterTestHelpers.GetSampleVeadGroups(prefix: "Second");
            var cluster2    = new Cluster("test2", veadgroups2);

            setOfClusters.AddCluster(cluster2);

            Assert.Equal(2, setOfClusters.NumClusters);
            var veadgroups3 = ClusterTestHelpers.GetSampleVeadGroups(1, 1, prefix: "Tester");

            // If can be merged, we should have 1 cluster.
            var bestCluster = ClusterMerger.MergeAllBestCandidates(setOfClusters, 0, new List <Cluster> {
                cluster, cluster2
            }, veadgroups3.First());

            Assert.Equal(1, setOfClusters.NumClusters);
        }
예제 #7
0
        public static Dictionary <VariantSite, VariantPhasingResult> GetPhasingProbabilities(List <VariantSite> variantSites, SetOfClusters clusters)
        {
            var results = new Dictionary <VariantSite, VariantPhasingResult>();

            foreach (var variantSite in variantSites)
            {
                results.Add(variantSite, GetPhasingProbabilitiesForVariant(variantSites.ToList(), clusters, variantSite));
            }

            //TODO debug Clusters: (VariantGroup, Chr, Pos, Ref, Alt); Neighbors (VariantGroup, Chr, Pos, Ref, Alt, ProbOfAGivenB, ProbOfB, ProbOfAAndB)

            return(results);
        }
예제 #8
0
        private static VariantPhasingResult GetPhasingProbabilitiesForVariant(List <VariantSite> variantGroup, SetOfClusters clusters, VariantSite variantSiteA)
        {
            var otherVariants = variantGroup.Where(vs => vs != variantSiteA).ToList();

            var phasingResult = new VariantPhasingResult(variantSiteA, otherVariants, clusters.NumClusters);

            var relativeWeights = clusters.GetRelativeWeights();

            //how many clusters have B in them
            //how many clusters have A and B in them?

            foreach (var cluster in clusters.Clusters)
            {
                var supportDict = cluster.GetVeadCountsInCluster(variantGroup);

                var weight = relativeWeights[cluster.Name];

                foreach (var variantSiteB in otherVariants)
                {
                    if (supportDict[variantSiteB] <= 0)
                    {
                        continue;
                    }
                    phasingResult.AddSupportForB(variantSiteB, weight);


                    if (supportDict[variantSiteA] > 0)
                    {
                        phasingResult.AddSupportForAandB(variantSiteB, weight);
                    }
                }
            }
            return(phasingResult);
        }
        public SetOfClusters ClusterVeadGroups(List <VeadGroup> veadGroups, string nbhdID)
        {
            try
            {
                // Make the meatiest clusters first.
                veadGroups.Sort();
                var clusters = new SetOfClusters(_options);
                if (veadGroups.Count == 0)
                {
                    Logger.WriteToLog("No vead groups given to clustering algorithm.");
                    return(clusters);
                }

                var maxNumNewClusters = veadGroups[0].SiteResults.Length * _options.MaxNumNewClustersPerSite;

                var nbhdStart = veadGroups[0].SiteResults[0].VcfReferencePosition;
                var nbhdEnd   = veadGroups[0].SiteResults[veadGroups[0].SiteResults.Length - 1].VcfReferencePosition;

                Logger.WriteToLog("Maximum num new clusters for this nbhd is " + maxNumNewClusters);
                Logger.WriteToLog("There are {0} variant sites in this nbhd, from position {1} to {2}", veadGroups[0].SiteResults.Length, nbhdStart, nbhdEnd);

                if (_debug)
                {
                    Logger.WriteToLog("variant-compressed read groups as follows:  ");
                    Logger.WriteToLog("count" + "\t", veadGroups[0].ToPositions());
                    foreach (var vG in veadGroups)
                    {
                        Logger.WriteToLog("\t" + vG.NumVeads + "\t" + vG);
                    }
                }

                int outerIteration = 0;
                while (veadGroups.Count > 0)
                {
                    //Logger.WriteToLog("ITER: {0}.{1}\tNum clusters: {2} \tUnassigned read groups: {3}", outerIteration, 0, clusters.NumClusters, veadGroups.Count);
                    CreateNewCluster(veadGroups, clusters);

                    // Is there any poor-fitting read that should go somewhere else?
                    if (_options.AllowWorstFitRemoval)
                    {
                        clusters.ReAssignWorstFit();
                    }

                    //TODO log the contents of clusterSet (previously "PrintContents()").
                    const int maxNumReallocationIterations = 10;
                    var       reallocationIterationNumber  = 1;

                    // Keep trying to allocate free agents to clusters if possible, then tell how many we have left free.
                    // If we haven't allocated any in the last round, give up and break off a new cluster.
                    while (veadGroups.Count > 0)
                    {
                        var initialReadsLeft = veadGroups.Count;
                        //Logger.WriteToLog("ITER: {0}.{1}\tNum clusters: {2} \tUnassigned read groups: {3}", outerIteration, reallocationIterationNumber, clusters.NumClusters, initialReadsLeft);

                        // Find the best existing cluster for each free agent. Add the free agent to that cluster if it exists. Return anyone left free.
                        veadGroups = AllocateReadsToClusters(veadGroups, clusters,
                                                             _options.MaxNumberDisagreements);

                        //TODO log the contents of clusterSet (previously "PrintContents()").

                        if (veadGroups.Count == initialReadsLeft)
                        {
                            break; // Reallocation didn't do any good. Give up.
                        }
                        reallocationIterationNumber++;
                        if (reallocationIterationNumber > maxNumReallocationIterations)
                        {
                            break;
                        }
                    }

                    if (clusters.NumClusters > maxNumNewClusters)
                    {
                        break;
                    }

                    outerIteration++;
                }

                //Logger.WriteToLog("ITER: {0}.{1}\tNum clusters: {2} \tUnassigned read groups: {3}", outerIteration, 0, clusters.NumClusters, veadGroups.Count);

                if (clusters != null)
                {
                    //Logger.WriteToLog("Found " + clusters.Clusters.Length + " clusters in Nbhd " + nbhdID);


                    if (_options.ClusterConstraint > 0)
                    {
                        MeetPloidyConstraints(clusters);
                    }
                }

                return(clusters);
            }
            catch (Exception ex)
            {
                Logger.WriteToLog("Clustering issue.", ex);
                throw;
            }
        }
        private List <VeadGroup> AllocateReadsToClusters(List <VeadGroup> veadGroups, SetOfClusters clusters, int maxNumDisagreements)
        {
            var vgsRemaining = new List <VeadGroup>();

            foreach (var vg in veadGroups)
            {
                var bestFits = clusters.GetClusterFits(vg);

                if (bestFits.Count == 0)
                {
                    vgsRemaining.Add(vg);
                }
                else
                {
                    var bestCandidates = bestFits.Last().Value;
                    var bestcluster    = bestCandidates[0];

                    if (_options.AllowClusterMerging)
                    {
                        bestcluster = ClusterMerger.MergeAllBestCandidates(clusters, maxNumDisagreements, bestCandidates, vg);
                    }

                    bestcluster.Add(vg);
                }
            }

            return(vgsRemaining);
        }
 private void CreateNewCluster(List <VeadGroup> vgs, SetOfClusters clusters)
 {
     clusters.CreateAndAddCluster(vgs[0]);
     vgs.Remove(vgs[0]);
 }