public double IntraClusterDistance(Cluster C, string[] arr_input_file, double[,] Dmatrix, string distance_type) { //type of distance measure: single link: singleLink, complete link:completLink, Average Link: AverageLink, median link:MedianLink or MedoidLink distance_type = distance_type.Trim(); //use UPPERCASE distance_type = distance_type.ToUpper(); if (distance_type != "SINGLELINK" && distance_type != "COMPLETELINK" && distance_type != "AVERAGELINK" && distance_type != "MEDOIDLINK" && distance_type != "MEDIANLINK") distance_type = "AVERAGELINK"; // List<Sequence> temp_medoid = new List<Sequence>(); Sequence OP = new Sequence(); Hashtable h_interClusterDistance = new Hashtable();//Host the contribution of each sequence to the InterCluster distance; the key is the sequenceID /* for each object in cluster C1 compute distance to all object in C2 */ double inter_d = 0.0; switch (distance_type) { case "AVERAGELINK": { double interClusterDistance = 0.0; int totalPairwiseDistances = 0; // System.Console.WriteLine("C1.clusterID:" + C1.clusterID+ "C2.clusterID :"+ C2.clusterID); foreach (Sequence seq1 in C.ListOfSeq) { foreach (Sequence seq2 in C.ListOfSeq) { if (seq1.sequenceValue != seq2.sequenceValue) { double d = OP.fetchDistance(seq1, seq2, arr_input_file, Dmatrix); // System.Console.WriteLine("dD:" + d); //Console.Read(); inter_d = inter_d + d; totalPairwiseDistances = totalPairwiseDistances + 1; } } } interClusterDistance = Math.Round(inter_d / totalPairwiseDistances, 3); // System.Console.WriteLine("inside distance interClusterDistance:" + interClusterDistance); return interClusterDistance; } case "SINGLELINK": { double interClusterDistance = 0.0; foreach (Sequence seq1 in C.ListOfSeq) { foreach (Sequence seq2 in C.ListOfSeq) { if (seq1.sequenceValue != seq2.sequenceValue) { double d = OP.fetchDistance(seq1, seq2, arr_input_file, Dmatrix); if (d < inter_d) inter_d = d; } } } interClusterDistance = inter_d; return interClusterDistance; } case "COMPLETELINK": { double interClusterDistance = 0.0; foreach (Sequence seq1 in C.ListOfSeq) { foreach (Sequence seq2 in C.ListOfSeq) { if (seq1.sequenceValue != seq2.sequenceValue) { double d = OP.fetchDistance(seq1, seq2, arr_input_file, Dmatrix); if (d > inter_d) inter_d = d; } } } interClusterDistance = inter_d; return interClusterDistance; } case "MEDOIDLINK": { double interClusterDistance = 0.0; //there could be many medoids for a cluster. We select the two with the largest distance foreach (Sequence seq1 in C.listOfMedoid) { foreach (Sequence seq2 in C.listOfMedoid) { if (seq1.sequenceValue != seq2.sequenceValue) { double d = OP.fetchDistance(seq1, seq2, arr_input_file, Dmatrix); if (d > inter_d) inter_d = d; } } } interClusterDistance = inter_d; return interClusterDistance; } default: return inter_d; }//switch //return interClusterDistance; }
public List<Sequence> ComputeMedoid(Cluster C, string[] arr_input_file, double[,] Dmatrix) { List<Sequence> temp_medoid = new List<Sequence>(); Sequence OP = new Sequence(); Hashtable h_intraClusterDistance = new Hashtable();//Host the contribution of each sequence to the intraCluster distance; the key is the sequenceID // Hashtable h_sequence = new Hashtable();//Host the contribution of each sequence to the intraCluster distance; the key is the sequenceID /* for each object in this cluster select the object in middle by computing the distance of each object to all objects*/ double intraCluster = 0.0; // int numOfSeq=C.ListOfSeq.Count; // double[] arr_intraClusterD = new double[numOfSeq]; foreach (Sequence seq1 in C.ListOfSeq) { intraCluster = 0; foreach (Sequence seq2 in C.ListOfSeq) { if (seq1.sequenceValue != seq2.sequenceValue) { double d = OP.fetchDistance(seq1, seq2, arr_input_file, Dmatrix); intraCluster = intraCluster + d; } } //The contribution of each sequ is in the hash. seq1.contributionToIntraClusterDistance = intraCluster; h_intraClusterDistance.Add(seq1.sequenceValue, seq1); } // The squence with the minimum distance is the medoid double minDistance = -1.0; bool first_time = true; foreach (DictionaryEntry entry in h_intraClusterDistance) { Sequence temp = (Sequence)(entry.Value); if (first_time) minDistance = temp.contributionToIntraClusterDistance; if (temp.contributionToIntraClusterDistance < minDistance) minDistance = temp.contributionToIntraClusterDistance; } // At this point the minimumn value of intra cluster distance is known. What is needed to find out all the sequences with same value foreach (DictionaryEntry entry in h_intraClusterDistance) { Sequence temp = (Sequence)(entry.Value); if (temp.contributionToIntraClusterDistance == minDistance) temp_medoid.Add(temp); } return temp_medoid; }