//************************************************************************* // Constructor: CommunityPair() // /// <summary> /// Initializes a new instance of the <see cref="CommunityPair" /> class. /// </summary> //************************************************************************* public CommunityPair() { m_oCommunity1 = null; m_oCommunity2 = null; m_fDeltaQ = Community.DeltaQNotSet; AssertValid(); }
OnMergedCommunities ( Community mergedCommunity1, Community mergedCommunity2, Community newMergedCommunity, Single newCommunityPairDeltaQ, DeltaQMaxHeap deltaQMaxHeap ) { Debug.Assert(mergedCommunity1 != null); Debug.Assert(mergedCommunity2 != null); Debug.Assert(newMergedCommunity != null); Debug.Assert(deltaQMaxHeap != null); AssertValid(); // If only one of the two merged communities was connected to this // community, fPreviousCommunityPairDeltaQ is the delta Q for this // community's community pair for the merged community. If both were // connected, fPreviousCommunityPairDeltaQ is the larger of the two // delta Q values. Single fPreviousCommunityPairDeltaQ = Single.MinValue; Int32 iMergedCommunity1ID = mergedCommunity1.ID; Int32 iMergedCommunity2ID = mergedCommunity2.ID; Debug.Assert(iMergedCommunity1ID != iMergedCommunity2ID); Int32 iSmallerMergedCommunityID = Math.Min(iMergedCommunity1ID, iMergedCommunity2ID); Int32 iLargerMergedCommunityID = Math.Max(iMergedCommunity1ID, iMergedCommunity2ID); // Delete the community pair or pairs that connect to one of the merged // communities. // // Go backwards through the community pairs so that they can be deleted // while looping. (Don't use foreach, because you can't delete while // enumerating.) for (Int32 i = m_oCommunityPairs.Count - 1; i >= 0; i--) { Int32 iOtherCommunityID = m_oCommunityPairs.Keys[i]; if (iOtherCommunityID > iLargerMergedCommunityID) { // We haven't yet reached the range of community pairs that // might connect to either merged community. continue; } if (iOtherCommunityID < iSmallerMergedCommunityID) { // We're beyond the range of community pairs that might connect // to either merged community. break; } CommunityPair oCommunityPair = m_oCommunityPairs.Values[i]; if (iOtherCommunityID == iLargerMergedCommunityID) { // This community pair connects to the merged community with // the larger ID. fPreviousCommunityPairDeltaQ = oCommunityPair.DeltaQ; m_oCommunityPairs.RemoveAt(i); } else if (iOtherCommunityID == iSmallerMergedCommunityID) { // This community pair connects to the merged community with // the smaller ID. fPreviousCommunityPairDeltaQ = Math.Max( fPreviousCommunityPairDeltaQ, oCommunityPair.DeltaQ); m_oCommunityPairs.RemoveAt(i); // There is no reason to continue looking at community pairs. break; } else { // This community pair does not connect to either merged // community. continue; } } Debug.Assert(fPreviousCommunityPairDeltaQ != Single.MinValue); // Add a new community pair that connects to the new merged community. CommunityPair oNewCommunityPair = new CommunityPair(); oNewCommunityPair.Community1 = this; oNewCommunityPair.Community2 = newMergedCommunity; oNewCommunityPair.DeltaQ = newCommunityPairDeltaQ; m_oCommunityPairs.Add(newMergedCommunity.ID, oNewCommunityPair); // Update m_oCommunityPairWithMaximumDeltaQ if necessary. These rules // come from section 4.1 of "Finding Community Structure in Mega-scale // Social Networks," by Ken Wakita and Toshiyuki Tsurumi. Single fOldMaximumDeltaQ = this.MaximumDeltaQ; if (fPreviousCommunityPairDeltaQ < fOldMaximumDeltaQ) { // The deleted community pair (or pairs) was not the one with the // maximum delta Q. if (newCommunityPairDeltaQ <= fPreviousCommunityPairDeltaQ) { // The delta Q value for the new community pair is less than or // equal to the delta Q value of the deleted community pair (or // pairs). Do nothing. } else { // The delta Q value for the new community pair is greater than // the delta Q value of the deleted community pair (or pairs). if (newCommunityPairDeltaQ > fOldMaximumDeltaQ) { // The new community pair is the one with the maximum // delta Q. m_oCommunityPairWithMaximumDeltaQ = oNewCommunityPair; } } } else { // The deleted community pair (or pairs) was the one with the // maximum delta Q. if (newCommunityPairDeltaQ >= fPreviousCommunityPairDeltaQ) { // The new community pair is the one with the maximum // delta Q. m_oCommunityPairWithMaximumDeltaQ = oNewCommunityPair; } else { // Worst case: All community pairs must be scanned. Single fNewMaximumDeltaQ = Single.MinValue; foreach (CommunityPair oCommunityPair in m_oCommunityPairs.Values) { if (oCommunityPair.DeltaQ > fNewMaximumDeltaQ) { m_oCommunityPairWithMaximumDeltaQ = oCommunityPair; fNewMaximumDeltaQ = oCommunityPair.DeltaQ; } } } } if (fOldMaximumDeltaQ != this.MaximumDeltaQ) { // Update the max heap. deltaQMaxHeap.UpdateValue(this, this.MaximumDeltaQ); } }
//************************************************************************* // Method: OnMergedCommunities() // /// <summary> /// Gets called when two communities merge and one or both of them were /// connected to this community. /// </summary> /// /// <param name="mergedCommunity1"> /// The first community that was merged. /// </param> /// /// <param name="mergedCommunity2"> /// The second community that was merged. /// </param> /// /// <param name="newMergedCommunity"> /// The new community that the communities were merged into. /// </param> /// /// <param name="newCommunityPairDeltaQ"> /// The new delta Q value for the community pair that connects this /// community to the merged community. /// </param> /// /// <param name="deltaQMaxHeap"> /// Max heap, used to keep track of the maximum delta Q value in each /// community. There is an element in the max heap for each community. /// The key is the Community and the value is the Community's maximum /// delta Q. /// </param> //************************************************************************* public void OnMergedCommunities( Community mergedCommunity1, Community mergedCommunity2, Community newMergedCommunity, Single newCommunityPairDeltaQ, DeltaQMaxHeap deltaQMaxHeap ) { Debug.Assert(mergedCommunity1 != null); Debug.Assert(mergedCommunity2 != null); Debug.Assert(newMergedCommunity != null); Debug.Assert(deltaQMaxHeap != null); AssertValid(); // If only one of the two merged communities was connected to this // community, fPreviousCommunityPairDeltaQ is the delta Q for this // community's community pair for the merged community. If both were // connected, fPreviousCommunityPairDeltaQ is the larger of the two // delta Q values. Single fPreviousCommunityPairDeltaQ = Single.MinValue; Int32 iMergedCommunity1ID = mergedCommunity1.ID; Int32 iMergedCommunity2ID = mergedCommunity2.ID; Debug.Assert(iMergedCommunity1ID != iMergedCommunity2ID); Int32 iSmallerMergedCommunityID = Math.Min(iMergedCommunity1ID, iMergedCommunity2ID); Int32 iLargerMergedCommunityID = Math.Max(iMergedCommunity1ID, iMergedCommunity2ID); // Delete the community pair or pairs that connect to one of the merged // communities. // // Go backwards through the community pairs so that they can be deleted // while looping. (Don't use foreach, because you can't delete while // enumerating.) for (Int32 i = m_oCommunityPairs.Count - 1; i >= 0; i--) { Int32 iOtherCommunityID = m_oCommunityPairs.Keys[i]; if (iOtherCommunityID > iLargerMergedCommunityID) { // We haven't yet reached the range of community pairs that // might connect to either merged community. continue; } if (iOtherCommunityID < iSmallerMergedCommunityID) { // We're beyond the range of community pairs that might connect // to either merged community. break; } CommunityPair oCommunityPair = m_oCommunityPairs.Values[i]; if (iOtherCommunityID == iLargerMergedCommunityID) { // This community pair connects to the merged community with // the larger ID. fPreviousCommunityPairDeltaQ = oCommunityPair.DeltaQ; m_oCommunityPairs.RemoveAt(i); } else if (iOtherCommunityID == iSmallerMergedCommunityID) { // This community pair connects to the merged community with // the smaller ID. fPreviousCommunityPairDeltaQ = Math.Max( fPreviousCommunityPairDeltaQ, oCommunityPair.DeltaQ); m_oCommunityPairs.RemoveAt(i); // There is no reason to continue looking at community pairs. break; } else { // This community pair does not connect to either merged // community. continue; } } Debug.Assert(fPreviousCommunityPairDeltaQ != Single.MinValue); // Add a new community pair that connects to the new merged community. CommunityPair oNewCommunityPair = new CommunityPair(); oNewCommunityPair.Community1 = this; oNewCommunityPair.Community2 = newMergedCommunity; oNewCommunityPair.DeltaQ = newCommunityPairDeltaQ; m_oCommunityPairs.Add(newMergedCommunity.ID, oNewCommunityPair); // Update m_oCommunityPairWithMaximumDeltaQ if necessary. These rules // come from section 4.1 of "Finding Community Structure in Mega-scale // Social Networks," by Ken Wakita and Toshiyuki Tsurumi. Single fOldMaximumDeltaQ = this.MaximumDeltaQ; if (fPreviousCommunityPairDeltaQ < fOldMaximumDeltaQ) { // The deleted community pair (or pairs) was not the one with the // maximum delta Q. if (newCommunityPairDeltaQ <= fPreviousCommunityPairDeltaQ) { // The delta Q value for the new community pair is less than or // equal to the delta Q value of the deleted community pair (or // pairs). Do nothing. } else { // The delta Q value for the new community pair is greater than // the delta Q value of the deleted community pair (or pairs). if (newCommunityPairDeltaQ > fOldMaximumDeltaQ) { // The new community pair is the one with the maximum // delta Q. m_oCommunityPairWithMaximumDeltaQ = oNewCommunityPair; } } } else { // The deleted community pair (or pairs) was the one with the // maximum delta Q. if (newCommunityPairDeltaQ >= fPreviousCommunityPairDeltaQ) { // The new community pair is the one with the maximum // delta Q. m_oCommunityPairWithMaximumDeltaQ = oNewCommunityPair; } else { // Worst case: All community pairs must be scanned. Single fNewMaximumDeltaQ = Single.MinValue; foreach (CommunityPair oCommunityPair in m_oCommunityPairs.Values) { if (oCommunityPair.DeltaQ > fNewMaximumDeltaQ) { m_oCommunityPairWithMaximumDeltaQ = oCommunityPair; fNewMaximumDeltaQ = oCommunityPair.DeltaQ; } } } } if (fOldMaximumDeltaQ != this.MaximumDeltaQ) { // Update the max heap. deltaQMaxHeap.UpdateValue(this, this.MaximumDeltaQ); } }
MergeCommunities ( LinkedList <Community> oCommunities, CommunityPair oCommunityPairToMerge, DeltaQMaxHeap oDeltaQMaxHeap, Int32 iEdgesInGraph, IDGenerator oIDGenerator ) { Debug.Assert(oCommunityPairToMerge != null); Debug.Assert(oCommunities != null); Debug.Assert(oDeltaQMaxHeap != null); Debug.Assert(iEdgesInGraph > 0); Debug.Assert(oIDGenerator != null); // Merge Community1 and Community2 into a NewCommunity. Community oCommunity1 = oCommunityPairToMerge.Community1; Community oCommunity2 = oCommunityPairToMerge.Community2; Community oNewCommunity = new Community(); oNewCommunity.ID = oIDGenerator.GetNextID(); oNewCommunity.Degree = oCommunity1.Degree + oCommunity2.Degree; ICollection <IVertex> oNewCommunityVertices = oNewCommunity.Vertices; foreach (IVertex oVertex in oCommunity1.Vertices) { oNewCommunityVertices.Add(oVertex); } foreach (IVertex oVertex in oCommunity2.Vertices) { oNewCommunityVertices.Add(oVertex); } // In the following sorted lists, the sort key is the ID of // CommunityPair.Community2 and the value is the CommunityPair. SortedList <Int32, CommunityPair> oCommunity1CommunityPairs = oCommunity1.CommunityPairs; SortedList <Int32, CommunityPair> oCommunity2CommunityPairs = oCommunity2.CommunityPairs; SortedList <Int32, CommunityPair> oNewCommunityCommunityPairs = oNewCommunity.CommunityPairs; Int32 iCommunity1CommunityPairs = oCommunity1CommunityPairs.Count; Int32 iCommunity2CommunityPairs = oCommunity2CommunityPairs.Count; IList <Int32> oCommunity1Keys = oCommunity1CommunityPairs.Keys; IList <CommunityPair> oCommunity1Values = oCommunity1CommunityPairs.Values; IList <Int32> oCommunity2Keys = oCommunity2CommunityPairs.Keys; IList <CommunityPair> oCommunity2Values = oCommunity2CommunityPairs.Values; // Step through the community pairs in oCommunity1 and oCommunity2. Int32 iCommunity1Index = 0; Int32 iCommunity2Index = 0; Single fMaximumDeltaQ = Single.MinValue; CommunityPair oCommunityPairWithMaximumDeltaQ = null; Single fTwoTimesEdgesInGraph = 2F * iEdgesInGraph; while (iCommunity1Index < iCommunity1CommunityPairs || iCommunity2Index < iCommunity2CommunityPairs) { Int32 iCommunity1OtherCommunityID = (iCommunity1Index < iCommunity1CommunityPairs) ? oCommunity1Keys[iCommunity1Index] : Int32.MaxValue; Int32 iCommunity2OtherCommunityID = (iCommunity2Index < iCommunity2CommunityPairs) ? oCommunity2Keys[iCommunity2Index] : Int32.MaxValue; CommunityPair oNewCommunityPair = new CommunityPair(); oNewCommunityPair.Community1 = oNewCommunity; if (iCommunity1OtherCommunityID == oCommunity2.ID) { // This is an internal connection eliminated by the merge. // Skip it. iCommunity1Index++; continue; } else if (iCommunity2OtherCommunityID == oCommunity1.ID) { // This is an internal connection eliminated by the merge. // Skip it. iCommunity2Index++; continue; } else if (iCommunity1OtherCommunityID == iCommunity2OtherCommunityID) { // The other community is connected to both commmunity 1 and // community 2. // // This is equation 10a from the paper "Finding Community // Structure in Very Large Networks," by Clauset, Newman, and // Moore. oNewCommunityPair.Community2 = oCommunity1Values[iCommunity1Index].Community2; oNewCommunityPair.DeltaQ = oCommunity1Values[iCommunity1Index].DeltaQ + oCommunity2Values[iCommunity2Index].DeltaQ; iCommunity1Index++; iCommunity2Index++; } else if (iCommunity1OtherCommunityID < iCommunity2OtherCommunityID) { // The other community is connected only to commmunity 1. // // This is equation 10b from the same paper. Community oOtherCommunity = oCommunity1Values[iCommunity1Index].Community2; oNewCommunityPair.Community2 = oOtherCommunity; Single fAj = oCommunity2.Degree / fTwoTimesEdgesInGraph; Single fAk = oOtherCommunity.Degree / fTwoTimesEdgesInGraph; oNewCommunityPair.DeltaQ = oCommunity1Values[iCommunity1Index].DeltaQ - 2F * fAj * fAk; iCommunity1Index++; } else { // The other community is connected only to commmunity 2. // // This is equation 10c from the same paper. Community oOtherCommunity = oCommunity2Values[iCommunity2Index].Community2; oNewCommunityPair.Community2 = oOtherCommunity; Single fAi = oCommunity1.Degree / fTwoTimesEdgesInGraph; Single fAk = oOtherCommunity.Degree / fTwoTimesEdgesInGraph; oNewCommunityPair.DeltaQ = oCommunity2Values[iCommunity2Index].DeltaQ - 2F * fAi * fAk; iCommunity2Index++; } oNewCommunityCommunityPairs.Add(oNewCommunityPair.Community2.ID, oNewCommunityPair); Single fNewCommunityPairDeltaQ = oNewCommunityPair.DeltaQ; if (fNewCommunityPairDeltaQ > fMaximumDeltaQ) { fMaximumDeltaQ = oNewCommunityPair.DeltaQ; oCommunityPairWithMaximumDeltaQ = oNewCommunityPair; } // The other community is connected to one or both of the merged // communities. Update it. oNewCommunityPair.Community2.OnMergedCommunities( oCommunity1, oCommunity2, oNewCommunity, fNewCommunityPairDeltaQ, oDeltaQMaxHeap); } oNewCommunity.CommunityPairWithMaximumDeltaQ = oCommunityPairWithMaximumDeltaQ; // Update the community list. oCommunities.Remove(oCommunity1); oCommunities.Remove(oCommunity2); oCommunities.AddLast(oNewCommunity); // Update the max heap. oDeltaQMaxHeap.Remove(oCommunity1); oDeltaQMaxHeap.Remove(oCommunity2); oDeltaQMaxHeap.Add(oNewCommunity, oNewCommunity.MaximumDeltaQ); }
CreateCommunities ( IVertexCollection oVertices, IDGenerator oIDGenerator ) { Debug.Assert(oVertices != null); Debug.Assert(oIDGenerator != null); AssertValid(); // This is the list of communities. Initially, there will be one // community for each of the graph's vertices. LinkedList <Community> oCommunities = new LinkedList <Community>(); // This temporary dictionary is used to map a vertex ID to a community. // The key is the IVertex.ID and the value is the corresponding // Community object. Dictionary <Int32, Community> oVertexIDDictionary = new Dictionary <Int32, Community>(oVertices.Count); // First, create a community for each of the graph's vertices. Each // community contains just the vertex. foreach (IVertex oVertex in oVertices) { Community oCommunity = new Community(); Int32 iID = oIDGenerator.GetNextID(); oCommunity.ID = iID; oCommunity.Vertices.Add(oVertex); // TODO: IVertex.AdjacentVertices includes self-loops. Should // self-loops be eliminated everywhere, including here and within // the graph's total edge count? Not sure how self-loops are // affecting the algorithm used by this class... oCommunity.Degree = oVertex.AdjacentVertices.Count; oCommunities.AddLast(oCommunity); oVertexIDDictionary.Add(oVertex.ID, oCommunity); } // Now populate each community's list of community pairs. foreach (Community oCommunity1 in oCommunities) { Debug.Assert(oCommunity1.Vertices.Count == 1); IVertex oVertex = oCommunity1.Vertices.First(); SortedList <Int32, CommunityPair> oCommunityPairs = oCommunity1.CommunityPairs; foreach (IVertex oAdjacentVertex in oVertex.AdjacentVertices) { if (oAdjacentVertex == oVertex) { // Skip self-loops. continue; } Community oCommunity2 = oVertexIDDictionary[oAdjacentVertex.ID]; CommunityPair oCommunityPair = new CommunityPair(); oCommunityPair.Community1 = oCommunity1; oCommunityPair.Community2 = oCommunity2; oCommunityPairs.Add(oCommunity2.ID, oCommunityPair); } } return(oCommunities); }
TryCalculateClustersSnap ( IGraph oGraph, SnapGraphMetrics eSnapGraphMetric, BackgroundWorker oBackgroundWorker, out ICollection <Community> oGraphMetrics ) { Debug.Assert(oGraph != null); AssertValid(); LinkedList <Community> oCommunities = new LinkedList <Community>(); oGraphMetrics = oCommunities; if (oBackgroundWorker != null) { if (oBackgroundWorker.CancellationPending) { return(false); } ReportProgress(1, 3, oBackgroundWorker); } // Make it easy to find the graph's vertices by vertex ID. The key is // a vertex ID and the value is the corresponding vertex object. IVertexCollection oVertices = oGraph.Vertices; Dictionary <Int32, IVertex> oVertexIDDictionary = new Dictionary <Int32, IVertex>(oVertices.Count); foreach (IVertex oVertex in oVertices) { oVertexIDDictionary.Add(oVertex.ID, oVertex); } // Tell the SNAP graph library to calculate the clusters. String sOutputFilePath = CalculateSnapGraphMetrics(oGraph, eSnapGraphMetric); if (oBackgroundWorker != null) { ReportProgress(2, 3, oBackgroundWorker); } // The output file for cluster metrics has a header line followed by // one line for each vertex that identifies which cluster the vertex is // in. The vertices are sorted by cluster. using (StreamReader oStreamReader = new StreamReader( sOutputFilePath)) { String sLine = oStreamReader.ReadLine(); Debug.Assert(sLine == "Cluster ID\tVertex ID"); Int32 iLastClusterID = -1; Community oCommunity = null; while (oStreamReader.Peek() >= 0) { sLine = oStreamReader.ReadLine(); String [] asFields = sLine.Split('\t'); Debug.Assert(asFields.Length == 2); Int32 iClusterID = ParseSnapInt32GraphMetricValue(asFields, 0); Int32 iVertexID = ParseSnapInt32GraphMetricValue(asFields, 1); if (iClusterID != iLastClusterID) { oCommunity = new Community(); oCommunity.ID = iClusterID; oCommunities.AddLast(oCommunity); iLastClusterID = iClusterID; } Debug.Assert(oCommunity != null); oCommunity.Vertices.Add(oVertexIDDictionary[iVertexID]); } } File.Delete(sOutputFilePath); return(true); }
TryCalculateClustersWakitaTsurumi ( IGraph oGraph, BackgroundWorker oBackgroundWorker, out ICollection <Community> oGraphMetrics ) { Debug.Assert(oGraph != null); AssertValid(); IVertexCollection oVertices = oGraph.Vertices; Int32 iVertices = oVertices.Count; Int32 iEdges = oGraph.Edges.Count; IDGenerator oIDGenerator = new IDGenerator(1); // Create and populate a community for each of the graph's vertices. LinkedList <Community> oCommunities = CreateCommunities(oVertices, oIDGenerator); oGraphMetrics = oCommunities; if (iVertices == 0 || iEdges == 0) { // There is no point in going any further. return(true); } // This max heap is used to keep track of the maximum delta Q value in // each community. There is an element in the max heap for each // community. The key is the Community and the value is the // Community's maximum delta Q. DeltaQMaxHeap oDeltaQMaxHeap = new DeltaQMaxHeap(iVertices); // Initialize all the delta Q values. InitializeDeltaQs(oCommunities, oDeltaQMaxHeap, iEdges); // Run the algorithm outlined in the Wakita/Tsurumi paper. BinaryHeapItem <Community, Single> oBinaryHeapItemWithMaximumDeltaQ; Int32 iMergeCycles = 0; // Retrieve the community pair with the largest delta Q. while (oDeltaQMaxHeap.TryGetTop(out oBinaryHeapItemWithMaximumDeltaQ)) { // Check for cancellation and report progress every // MergeCyclesPerProgressReport calculations. if (oBackgroundWorker != null && iMergeCycles % MergeCyclesPerProgressReport == 0) { if (oBackgroundWorker.CancellationPending) { return(false); } ReportProgress(iMergeCycles, iVertices, oBackgroundWorker); } Community oCommunityWithMaximumDeltaQ = oBinaryHeapItemWithMaximumDeltaQ.Key; Single fMaximumGlobalDeltaQ = oBinaryHeapItemWithMaximumDeltaQ.Value; if (fMaximumGlobalDeltaQ < 0) { // Merging additional communities would yield worse results, so // quit. break; } // Merge the communities in the community pair with maximum // delta Q, update the maximum delta Q values for all communities, // and update the global max heap. CommunityPair oCommunityPairWithMaximumDeltaQ = oCommunityWithMaximumDeltaQ.CommunityPairWithMaximumDeltaQ; MergeCommunities(oCommunities, oCommunityPairWithMaximumDeltaQ, oDeltaQMaxHeap, iEdges, oIDGenerator); iMergeCycles++; } return(true); }