public static List <TestCentroid> GetClustersTest(List <TestCentroid> clusters, float alpha, List <DocumentVectorTest> data) { List <TestCentroid> newclusters = new List <TestCentroid>(); int N = data.Count; int number_of_clusters = clusters.Count; float MIN_POINTS = alpha * N; for (int i = 0; i < number_of_clusters; i++) { TestCentroid centroid = new TestCentroid { GroupedDocument = new List <DocumentVectorTest>() }; if (clusters[i].GroupedDocument.Count >= MIN_POINTS) { foreach (var elements in clusters[i].GroupedDocument) { centroid.GroupedDocument.Add(elements); newclusters.Add(centroid); } } } return(newclusters); }
public static Tuple <int[], int[], List <TestCentroid> > Set(List <DocumentVectorTest> docCollection) { Tuple <int[], int[], List <TestCentroid> > result; parent = new int[docCollection.Count]; rank = new int[docCollection.Count]; var cntroidSet = new List <TestCentroid>(); for (int i = 0; i < docCollection.Count; i++) { parent[i] = i; rank[i] = 0; } TestCentroid newCentroid; //here is a problem cntroidSet.Count must be 46 not 23!!! List <DocumentVectorTest> docCollectionCopy = new List <DocumentVectorTest>(docCollection); for (int j = 0; j < docCollectionCopy.Count; j++) { newCentroid = new TestCentroid(); newCentroid.GroupedDocument = new List <DocumentVectorTest>(); newCentroid.GroupedDocument.Add(docCollectionCopy[j]); cntroidSet.Add(newCentroid); } result = new Tuple <int[], int[], List <TestCentroid> >(parent, rank, cntroidSet); return(result); }
public static List <TestCentroid> CentroidCalculationsForKMeans(List <DocumentVectorTest> data, int ClusterNumber) { List <TestCentroid> centroidList = new List <TestCentroid>(); Random randomizer = new Random(); HashSet <int> indexSet = new HashSet <int>(); int index = 0; while (centroidList.Count != ClusterNumber) { index = randomizer.Next(0, data.Count + 1); if (!indexSet.Contains(index)) { indexSet.Add(index); TestCentroid newCentroid = new TestCentroid(); newCentroid.GroupedDocument = new List <DocumentVectorTest>(); newCentroid.GroupedDocument.Add(data[index]); centroidList.Add(newCentroid); } else if (indexSet.Contains(index)) { continue; } } foreach (var doc in centroidList) { doc.CalculateMeans(); doc.GroupedDocument.Clear(); } return(centroidList); }
public static float[] CalculateProbabilityArray_Test(TestCentroid oldCentroid, List <DocumentVectorTest> vSpace) { List <DocumentVectorTest> vSpaceCopy = new List <DocumentVectorTest>(vSpace); float[] vector_A = oldCentroid.GroupedDocument[0].VectorSpace; float[] DistanceQuad = new float[vSpaceCopy.Count]; for (int i = 0; i < DistanceQuad.Length; i++) { DistanceQuad[i] = 0; } float SumDistanceQuad = 0; int previous_index = vSpaceCopy.IndexOf(oldCentroid.GroupedDocument[0]); for (int j = 0; j <= vSpaceCopy.Count - 1; j++) { float[] vector_B = vSpaceCopy[j].VectorSpace; for (int k = 0; k <= vSpaceCopy[j].VectorSpace.Length - 1; k++) { DistanceQuad[j] += (float)Math.Pow((vector_A[k] - vector_B[k]), 2); } SumDistanceQuad += DistanceQuad[j]; } for (int j = 0; j <= DistanceQuad.Length - 1; j++) { DistanceQuad[j] = DistanceQuad[j] / SumDistanceQuad; } return(DistanceQuad); }
public static List <TestCentroid> CreateClusterSet(int clusterNumber) { List <TestCentroid> result = new List <TestCentroid>(); for (int i = 0; i < clusterNumber; i++) { TestCentroid centroid = new TestCentroid(); centroid.GroupedDocument = new List <DocumentVectorTest>(); result.Add(centroid); } return(result); }
private static List <TestCentroid> TestCentroidInitializer(int numClusters) { List <TestCentroid> initializedList = new List <TestCentroid>(numClusters); for (int i = 0; i < numClusters; i++) { TestCentroid newTestCentroid = new TestCentroid(); newTestCentroid.GroupedDocument = new List <DocumentVectorTest>(); initializedList.Add(newTestCentroid); } return(initializedList); }
private static TestCentroid Calculate_Next_Centroid_Test(TestCentroid firstcentroid, List <DocumentVectorTest> vSpace) { TestCentroid next_centroid = new TestCentroid(); next_centroid.GroupedDocument = new List <DocumentVectorTest>(); List <DocumentVectorTest> vSpaceCopy = new List <DocumentVectorTest>(vSpace); float[] probabilitiesMatrixSimple = CalculateProbabilityArray_Test(firstcentroid, vSpaceCopy); float[] probabilitiesMatrix = new float[probabilitiesMatrixSimple.Length]; for (var i = 0; i < probabilitiesMatrix.Length; i++) { probabilitiesMatrix[i] = 0; } for (var i = 0; i < probabilitiesMatrix.Length; i++) { for (var j = 0; j < i; j++) { probabilitiesMatrix[i] += probabilitiesMatrixSimple[j]; } } Random rand = new Random(); float interval_Value = (float)rand.NextDouble(); float sum_Of_Probabilies = 0.0F; int index_of_min_distance_element = 0; for (int i = 0; i < probabilitiesMatrix.Length; i++) { sum_Of_Probabilies += probabilitiesMatrix[i]; //here are the problem! - trying to fix; } for (int j = 0; j < probabilitiesMatrix.Length; j++) { if (sum_Of_Probabilies > interval_Value & sum_Of_Probabilies < probabilitiesMatrix[j]) { index_of_min_distance_element = j - 1; } else { continue; } } next_centroid.GroupedDocument.Add(vSpaceCopy[index_of_min_distance_element]); vSpaceCopy.RemoveAt(index_of_min_distance_element); // but here we can find distance from oldCentroid tp old Centroid return(next_centroid); }
public static List <TestCentroid> CentroidCalculationsForTestKMeansPP(List <DocumentVectorTest> dataPP, int ClusterNumberPP) { List <TestCentroid> centroidListPP = new List <TestCentroid>(); List <DocumentVectorTest> dataPPCopy = new List <DocumentVectorTest>(dataPP); List <DocumentVectorTest> existingCentroids = new List <DocumentVectorTest>(); Random randomizerPP = new Random(); float[] distances = new float[dataPP.Count]; int indexOfFirstElement = randomizerPP.Next(0, dataPP.Count);// + 1); TestCentroid firstCentroid = new TestCentroid(); firstCentroid.GroupedDocument = new List <DocumentVectorTest>(); firstCentroid.GroupedDocument.Add(dataPP[indexOfFirstElement]); centroidListPP.Add(firstCentroid); HashSet <TestCentroid> stringHashSet = new HashSet <TestCentroid>(); while (centroidListPP.Count != ClusterNumberPP) { TestCentroid newCentroid = new TestCentroid(); newCentroid.GroupedDocument = new List <DocumentVectorTest>(); newCentroid = Calculate_Next_Centroid_Test(firstCentroid, dataPPCopy); if (!existingCentroids.Contains(newCentroid.GroupedDocument[0])) { existingCentroids.Add(newCentroid.GroupedDocument[0]); centroidListPP.Add(newCentroid); //zmiana1 stringHashSet.Add(newCentroid); firstCentroid = newCentroid; dataPPCopy.Remove(newCentroid.GroupedDocument[0]); } //zmiana2 else if (existingCentroids.Contains(newCentroid.GroupedDocument[0]) || stringHashSet.Contains(newCentroid)) { continue; } //zmiana 3 centroidListPP = stringHashSet.ToList(); } return(centroidListPP); }
public static List <TestCentroid> GetClustersTest(int[] clusters, float alpha, List <DocumentVectorTest> data) { List <TestCentroid> centroidSet = new List <TestCentroid>(); HashSet <int> clustersSet = new HashSet <int>(); int N = data.Count; int number_of_clusters = clusters.Length; float MIN_POINTS = alpha * N; for (int i = 0; i < N; i++) { clustersSet.Add(clusters[i]); } for (int i = 0; i < clustersSet.Count; i++) { TestCentroid centroid = new TestCentroid { GroupedDocument = new List <DocumentVectorTest>() }; var docIndex = clustersSet.ElementAt(i); centroid.GroupedDocument.Add(data[docIndex]); centroidSet.Add(centroid); } for (int j = 0; j < clustersSet.Count; j++) { for (int i = 0; i < N; i++) { if (clustersSet.ElementAt(j) == clusters[i]) { centroidSet[j].GroupedDocument.Add(data[i]); } } } return(centroidSet); }