Esempio n. 1
0
        /// <summary>
        /// Clusters features based on their pairwise distances by finding the minimal spanning tree (MST) via Prim's algorithm.
        /// </summary>
        /// <param name="distances">Pairwise distances between all features in question.</param>
        /// <param name="clusters">Singleton clusters from each feature.</param>
        /// <returns>List of features clustered together.</returns>
        public override List <U> LinkFeatures(List <PairwiseDistance <T> > potentialDistances, Dictionary <int, U> clusters)
        {
            var newClusters = new List <U>();
            var distances   = new List <PairwiseDistance <T> >();

            // There is an edge case with this setup that a singleton outside of the range
            // of other features made it into the batch of edges, but there is no corresponding edge
            // to the rest of the graph(s).  So here we hash all features
            // then we ask for within the range, pare down that hash to a set of features that
            // have no corresponding edge.  These guys would ultimately be singletons we want
            // to capture...
            var clusterMap = new HashSet <T>();

            foreach (var cluster in clusters.Values)
            {
                foreach (var feature in cluster.Features)
                {
                    if (!clusterMap.Contains(feature))
                    {
                        clusterMap.Add(feature);
                    }
                }
            }


            foreach (var distance in potentialDistances)
            {
                if (AreClustersWithinTolerance(distance.FeatureX, distance.FeatureY))
                {
                    //distances.Add(distance);
                    if (clusterMap.Contains(distance.FeatureX))
                    {
                        clusterMap.Remove(distance.FeatureX);
                    }
                    if (clusterMap.Contains(distance.FeatureY))
                    {
                        clusterMap.Remove(distance.FeatureY);
                    }
                }
            }

            // Once we have removed any cluster
            foreach (var feature in clusterMap)
            {
                var cluster = new U();
                feature.SetParentFeature(cluster);
                cluster.AddChildFeature(feature);
                newClusters.Add(cluster);
            }

            var newDistances = (from element in potentialDistances
                                orderby element.Distance
                                select element).ToList();

            var queue = new Queue <Edge <T> >();
            var graph = new FeatureGraph <T>();

            // Sort out the distances so we dont have to recalculate distances.
            var id    = 0;
            var edges = new List <Edge <T> >();

            newDistances.ForEach(x => edges.Add(new Edge <T>(id++,
                                                             x.Distance,
                                                             x.FeatureX,
                                                             x.FeatureY)));
            graph.CreateGraph(edges);
            edges.ForEach(x => queue.Enqueue(x));

            // This makes sure we have
            var seenEdge = new HashSet <int>();


            // Now we start at the MST building
            if (DumpLinearRelationship)
            {
                Console.WriteLine("GraphEdgeLength");
            }
            while (queue.Count > 0)
            {
                var startEdge = queue.Dequeue();

                // If we have already seen the edge, ignore it...
                if (seenEdge.Contains(startEdge.ID))
                {
                    continue;
                }

                var mstGroup = ConstructSubTree(graph,
                                                seenEdge,
                                                startEdge);

                var clusterTree = new MstLrTree <Edge <T> >();

                // Get the mst value .
                double sum  = 0;
                double mean = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    seenEdge.Add(dist.ID);
                    sum += dist.Length;

                    clusterTree.Insert(dist);

                    var ppmDist = FeatureLight.ComputeMassPPMDifference(dist.VertexB.MassMonoisotopicAligned,
                                                                        dist.VertexA.MassMonoisotopicAligned);

                    if (DumpLinearRelationship)
                    {
                        Console.WriteLine("{0}", dist.Length); /*,,{1},{2},{3},{4},{5},{6},{7},{8}", dist.Length,
                                                                *         dist.VertexA.NetAligned,
                                                                *         dist.VertexA.MassMonoisotopicAligned,
                                                                *         dist.VertexA.DriftTime,
                                                                *         dist.VertexB.NetAligned,
                                                                *         dist.VertexB.MassMonoisotopicAligned,
                                                                *         dist.VertexB.DriftTime,
                                                                *         ppmDist,
                                                                *         Math.Abs(dist.VertexA.NetAligned - dist.VertexB.NetAligned));
                                                                */
                    }
                }

                var N = Convert.ToDouble(mstGroup.LinearRelationship.Count);

                // Calculate the standard deviation.
                mean = sum / N;
                sum  = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    var diff = dist.Length - mean;
                    sum += (diff * diff);
                }

                var stdev  = Math.Sqrt(sum / N);
                var cutoff = NSigma; // *stdev; // stdev* NSigma;

                var mstClusters = CreateClusters(mstGroup, cutoff);
                newClusters.AddRange(mstClusters);
            }

            return(newClusters);
        }
Esempio n. 2
0
        /// <summary>
        /// constructs a sub-tree
        /// </summary>
        /// <param name="graph"></param>
        /// <param name="visitedEdges"></param>
        /// <param name="startEdge"></param>
        /// <returns></returns>
        private MinimumSpanningTree <T> ConstructSubTree(FeatureGraph <T> graph,
                                                         HashSet <int> visitedEdges,
                                                         Edge <T> startEdge)
        {
            // Manages the tree being constructed.
            var tree = new MinimumSpanningTree <T>();

            // Manages the list of candidate edges
            var tempEdges = new UniqueEdgeList <T>();

            // Seed of the breadth first search (BFS)
            tempEdges.AddEdge(startEdge);

            // Start BFS
            while (tempEdges.Count > 0)
            {
                // Sort the edges based on distace.
                tempEdges.Sort();

                Edge <T> shortestEdge  = null;
                var      edgesToRemove = new List <Edge <T> >();

                // Find the shortest edge...
                foreach (var edge in tempEdges.Edges)
                {
                    var edgeSeen   = tree.HasEdgeBeenSeen(edge);
                    var vertexSeen = tree.HasEdgeVerticesBeenSeen(edge);

                    // Make sure that we havent seen this edge.
                    if (edgeSeen)
                    {
                        continue;
                    }

                    if (vertexSeen)
                    {
                        visitedEdges.Add(edge.ID);
                        edgesToRemove.Add(edge);
                        continue;
                    }

                    shortestEdge = edge;
                    tree.AddEdge(shortestEdge);
                    break;
                }

                // Remove any edges that have been used up..
                edgesToRemove.ForEach(x => tempEdges.RemoveEdge(x));
                edgesToRemove.ForEach(x => graph.RemoveEdge(x));

                // We didnt find an edge, so we have nothing else to connect...
                if (shortestEdge == null)
                {
                    // Make sure that we assert that there are no edges left...should be the case here!
                    System.Diagnostics.Debug.Assert(tempEdges.Count == 0);
                    break;
                }

                visitedEdges.Add(shortestEdge.ID);

                // Removes the shortest edge from the graph...
                graph.RemoveEdge(shortestEdge);

                var adjacentEdges = graph.GetAdjacentEdgesFromEdgeVertices(shortestEdge);
                //adjacentEdges.Sort();

                tempEdges.AddEdges(adjacentEdges.Edges);

                // Remove the shortest edge from the list of available edges left...
                tempEdges.RemoveEdge(shortestEdge);
            }

            return(tree);
        }