public static float[] d_min_centroids(List <Centroid> result) { float[] min_dist_between_cluster_elem = new float[result.Count]; float min_distance = 0; float[,] d_min_intercluster; for (int k = 0; k < result.Count; k++) { for (int k2 = 1; k2 < result.Count; k2++) { d_min_intercluster = new float[result[k].GroupedDocument.Count, result[k2].GroupedDocument.Count]; for (int i = 0; i < result[k].GroupedDocument.Count; i++) { for (int j = 0; j < result[k2].GroupedDocument.Count; j++) { d_min_intercluster[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[i].VectorSpace, result[k2].GroupedDocument[j].VectorSpace); } } min_distance = Find_Min_Value_in_array(d_min_intercluster, result[k].GroupedDocument.Count, result[k2].GroupedDocument.Count); min_dist_between_cluster_elem[k] = min_distance; } } return(min_dist_between_cluster_elem); }
public static float[] d_sr(List <Centroid> result) { float d_sr = 0.0F; float[,] intracluster_distance_matrix; float[] distances_in_one_cluster; float[] median_distances = new float[result.Count]; float sum = 0.0F; for (int k = 0; k < result.Count; k++) { int document_count = result[k].GroupedDocument.Count; intracluster_distance_matrix = new float[document_count, document_count]; distances_in_one_cluster = new float[document_count]; for (int n = 0; n < document_count; n++) { for (int j = 0; j < document_count; j++) { intracluster_distance_matrix[n, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[n].VectorSpace, result[k].GroupedDocument[j].VectorSpace); distances_in_one_cluster[n] += intracluster_distance_matrix[n, j]; } sum += distances_in_one_cluster[n]; } d_sr = sum / document_count; median_distances[k] = d_sr; sum = 0; } return(median_distances); }
public static float[] d_max(List <Centroid> result) { float d_min = 0.0F; float[,] intracluster_distance_matrix; float[] max_distances = new float[result.Count]; for (int k = 0; k < result.Count; k++) { int document_count = result[k].GroupedDocument.Count; intracluster_distance_matrix = new float[document_count, document_count]; for (int n = 0; n < document_count; n++) { for (int j = 0; j < document_count; j++) { intracluster_distance_matrix[n, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[n].VectorSpace, result[k].GroupedDocument[j].VectorSpace); } } d_min = Find_Max_Value_in_array(intracluster_distance_matrix); max_distances[k] = d_min; } return(max_distances); }
public static float[,] d_centroids(List <Centroid> result) { float[,] centroid_distances_matrix = new float[result.Count, result.Count]; for (int i = 0; i < result.Count; i++) { for (int j = 0; j < result.Count; j++) { centroid_distances_matrix[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[i].GroupedDocument[0].VectorSpace, result[j].GroupedDocument[0].VectorSpace); } } return(centroid_distances_matrix); }
/// <summary> /// This function return the proximity matrix with proximity between all elements. /// </summary> /// <param name="docCollection"></param> /// <returns></returns> public static float[,] Compute_Proximity_Matrix(List <DocumentVector> docCollection) { proximity_Matrix = new float[docCollection.Count, docCollection.Count]; for (int i = 0; i < docCollection.Count; i++) { for (int j = 0; j < docCollection.Count; j++) { if (i == j) { proximity_Matrix[i, j] = 0.0F; } else { proximity_Matrix[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(docCollection[i].VectorSpace, docCollection[j].VectorSpace); } } } return(proximity_Matrix); }
private static List <Centroid> AssignDocumentToCluster(List <Centroid> centroidCollection, List <DocumentVector> vectorSpace) { var result = new List <Centroid>(); //List<DocumentVector> newVectorSpace = vectorSpace; List <DocumentVector> newVectorSpace = new List <DocumentVector>(vectorSpace); result = centroidCollection; //float[,] distancematrix = new float[centroidCollection.Count, vectorSpace.Count]; float[,] distancematrix = new float[centroidCollection.Count, newVectorSpace.Count]; float minDistance = 0.1F; float currentValue = 0.1F; int DocIndex = 0; int ClusterIndex = 0; foreach (var center in centroidCollection) { //foreach (var doc in vectorSpace) foreach (var doc in newVectorSpace) { distancematrix[centroidCollection.IndexOf(center), newVectorSpace.IndexOf(doc)] = SimilarityMatrixCalculations.FindEuclideanDistance(center.GroupedDocument[0].VectorSpace, doc.VectorSpace); } } //while(vectorSpace.Count!=0) while (newVectorSpace.Count != 0) { for (int i = 0; i < centroidCollection.Count; i++) { for (int j = 0; j < newVectorSpace.Count; j++) //vectorSpace.Count { currentValue = distancematrix[i, j]; if (currentValue <= minDistance || currentValue != 0) { minDistance = currentValue; DocIndex = j; ClusterIndex = i; result[i].GroupedDocument.Add(newVectorSpace[j]); newVectorSpace.RemoveAt(j); } else { continue; } } } } newVectorSpace = new List <DocumentVector>(vectorSpace); return(result); }
public static List <Centroid> AverageMeansAssigned(List <Centroid> fillCentroidCollection, List <DocumentVector> vectorSpace) { List <Centroid> result; List <DocumentVector> newVectorSpace = vectorSpace; int length = vectorSpace[0].VectorSpace.Length; float[] newVectorSpaceArray = new float[length]; float[] minDistancesToCluster = new float[0]; for (int i = 0; i < length; i++) { newVectorSpaceArray[i] = 0.0F; } for (int c = 0; c < fillCentroidCollection.Count; c++) { for (int gd = 0; gd < fillCentroidCollection[c].GroupedDocument.Count; gd++) { for (int k = 0; k < fillCentroidCollection[c].GroupedDocument[gd].VectorSpace.Length; k++) { newVectorSpaceArray[k] += fillCentroidCollection[c].GroupedDocument[gd].VectorSpace[k]; } } } for (int c1 = 0; c1 < fillCentroidCollection.Count; c1++) { for (int gd1 = 0; gd1 < fillCentroidCollection[c1].GroupedDocument.Count; gd1++) { for (int k1 = 0; k1 < fillCentroidCollection[c1].GroupedDocument[gd1].VectorSpace.Length; k1++) { newVectorSpaceArray[k1] = newVectorSpaceArray[k1] / fillCentroidCollection[c1].GroupedDocument.Count; } } } float minDist = 0.1F; float currentValue = 0.1F; int index = 0; for (int i = 0; i < fillCentroidCollection.Count; i++) { minDistancesToCluster = new float[fillCentroidCollection[i].GroupedDocument.Count]; for (int j = 0; j < fillCentroidCollection[i].GroupedDocument.Count; j++) { //minDistancesToCluster = new float[fillCentroidCollection[i].GroupedDocument.Count]; minDistancesToCluster[j] = SimilarityMatrixCalculations.FindEuclideanDistance(fillCentroidCollection[i].GroupedDocument.First().VectorSpace, fillCentroidCollection[i].GroupedDocument[j].VectorSpace); //} for (int z = 0; z < minDistancesToCluster.Length; z++) { currentValue = minDistancesToCluster[z]; if (currentValue <= minDist && currentValue != 0) { minDist = currentValue; index = z; } //here we must to find the closest document to new vectorSpace; //for all docs in cluster create the vectorSpace } /* * DocumentVector newClusterCenter = fillCentroidCollection[i].GroupedDocument[index]; * fillCentroidCollection[i].GroupedDocument.Clear(); * fillCentroidCollection[i].GroupedDocument.Add(newClusterCenter); */ } DocumentVector newClusterCenter = fillCentroidCollection[i].GroupedDocument[index]; index = 0; fillCentroidCollection[i].GroupedDocument.Clear(); fillCentroidCollection[i].GroupedDocument.Add(newClusterCenter); } minDistancesToCluster = new float[0]; result = new List <Centroid>(fillCentroidCollection); return(result); }
private static float Euclidean_Distance(Centroid a, Centroid b) { return(SimilarityMatrixCalculations.FindEuclideanDistance(a.GroupedDocument[0].VectorSpace, b.GroupedDocument[0].VectorSpace)); }