public static float[] d_min_centroids(List <Centroid> result)
        {
            float[] min_dist_between_cluster_elem = new float[result.Count];
            float   min_distance = 0;

            float[,] d_min_intercluster;

            for (int k = 0; k < result.Count; k++)
            {
                for (int k2 = 1; k2 < result.Count; k2++)
                {
                    d_min_intercluster = new float[result[k].GroupedDocument.Count, result[k2].GroupedDocument.Count];
                    for (int i = 0; i < result[k].GroupedDocument.Count; i++)
                    {
                        for (int j = 0; j < result[k2].GroupedDocument.Count; j++)
                        {
                            d_min_intercluster[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[i].VectorSpace, result[k2].GroupedDocument[j].VectorSpace);
                        }
                    }
                    min_distance = Find_Min_Value_in_array(d_min_intercluster, result[k].GroupedDocument.Count, result[k2].GroupedDocument.Count);
                    min_dist_between_cluster_elem[k] = min_distance;
                }
            }

            return(min_dist_between_cluster_elem);
        }
Exemple #2
0
        public static float[] d_sr(List <Centroid> result)
        {
            float d_sr = 0.0F;

            float[,] intracluster_distance_matrix;
            float[] distances_in_one_cluster;
            float[] median_distances = new float[result.Count];
            float   sum = 0.0F;

            for (int k = 0; k < result.Count; k++)
            {
                int document_count = result[k].GroupedDocument.Count;
                intracluster_distance_matrix = new float[document_count, document_count];
                distances_in_one_cluster     = new float[document_count];
                for (int n = 0; n < document_count; n++)
                {
                    for (int j = 0; j < document_count; j++)
                    {
                        intracluster_distance_matrix[n, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[n].VectorSpace, result[k].GroupedDocument[j].VectorSpace);
                        distances_in_one_cluster[n]       += intracluster_distance_matrix[n, j];
                    }
                    sum += distances_in_one_cluster[n];
                }
                d_sr = sum / document_count;

                median_distances[k] = d_sr;
                sum = 0;
            }
            return(median_distances);
        }
Exemple #3
0
        public static float[] d_max(List <Centroid> result)
        {
            float d_min = 0.0F;

            float[,] intracluster_distance_matrix;
            float[] max_distances = new float[result.Count];

            for (int k = 0; k < result.Count; k++)
            {
                int document_count = result[k].GroupedDocument.Count;
                intracluster_distance_matrix = new float[document_count, document_count];
                for (int n = 0; n < document_count; n++)
                {
                    for (int j = 0; j < document_count; j++)
                    {
                        intracluster_distance_matrix[n, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[k].GroupedDocument[n].VectorSpace, result[k].GroupedDocument[j].VectorSpace);
                    }
                }

                d_min            = Find_Max_Value_in_array(intracluster_distance_matrix);
                max_distances[k] = d_min;
            }

            return(max_distances);
        }
        public static float[,] d_centroids(List <Centroid> result)
        {
            float[,] centroid_distances_matrix = new float[result.Count, result.Count];

            for (int i = 0; i < result.Count; i++)
            {
                for (int j = 0; j < result.Count; j++)
                {
                    centroid_distances_matrix[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(result[i].GroupedDocument[0].VectorSpace, result[j].GroupedDocument[0].VectorSpace);
                }
            }
            return(centroid_distances_matrix);
        }
Exemple #5
0
 /// <summary>
 /// This function return the proximity matrix with proximity between all elements.
 /// </summary>
 /// <param name="docCollection"></param>
 /// <returns></returns>
 public static float[,] Compute_Proximity_Matrix(List <DocumentVector> docCollection)
 {
     proximity_Matrix = new float[docCollection.Count, docCollection.Count];
     for (int i = 0; i < docCollection.Count; i++)
     {
         for (int j = 0; j < docCollection.Count; j++)
         {
             if (i == j)
             {
                 proximity_Matrix[i, j] = 0.0F;
             }
             else
             {
                 proximity_Matrix[i, j] = SimilarityMatrixCalculations.FindEuclideanDistance(docCollection[i].VectorSpace, docCollection[j].VectorSpace);
             }
         }
     }
     return(proximity_Matrix);
 }
        private static List <Centroid> AssignDocumentToCluster(List <Centroid> centroidCollection, List <DocumentVector> vectorSpace)
        {
            var result = new List <Centroid>();
            //List<DocumentVector> newVectorSpace = vectorSpace;
            List <DocumentVector> newVectorSpace = new List <DocumentVector>(vectorSpace);

            result = centroidCollection;
            //float[,] distancematrix = new float[centroidCollection.Count, vectorSpace.Count];
            float[,] distancematrix = new float[centroidCollection.Count, newVectorSpace.Count];

            float minDistance  = 0.1F;
            float currentValue = 0.1F;
            int   DocIndex     = 0;
            int   ClusterIndex = 0;

            foreach (var center in centroidCollection)
            {
                //foreach (var doc in vectorSpace)
                foreach (var doc in newVectorSpace)
                {
                    distancematrix[centroidCollection.IndexOf(center), newVectorSpace.IndexOf(doc)] = SimilarityMatrixCalculations.FindEuclideanDistance(center.GroupedDocument[0].VectorSpace, doc.VectorSpace);
                }
            }

            //while(vectorSpace.Count!=0)
            while (newVectorSpace.Count != 0)
            {
                for (int i = 0; i < centroidCollection.Count; i++)
                {
                    for (int j = 0; j < newVectorSpace.Count; j++) //vectorSpace.Count
                    {
                        currentValue = distancematrix[i, j];
                        if (currentValue <= minDistance || currentValue != 0)
                        {
                            minDistance  = currentValue;
                            DocIndex     = j;
                            ClusterIndex = i;
                            result[i].GroupedDocument.Add(newVectorSpace[j]);
                            newVectorSpace.RemoveAt(j);
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            newVectorSpace = new List <DocumentVector>(vectorSpace);
            return(result);
        }
        public static List <Centroid> AverageMeansAssigned(List <Centroid> fillCentroidCollection, List <DocumentVector> vectorSpace)
        {
            List <Centroid>       result;
            List <DocumentVector> newVectorSpace = vectorSpace;
            int length = vectorSpace[0].VectorSpace.Length;

            float[] newVectorSpaceArray   = new float[length];
            float[] minDistancesToCluster = new float[0];

            for (int i = 0; i < length; i++)
            {
                newVectorSpaceArray[i] = 0.0F;
            }

            for (int c = 0; c < fillCentroidCollection.Count; c++)
            {
                for (int gd = 0; gd < fillCentroidCollection[c].GroupedDocument.Count; gd++)
                {
                    for (int k = 0; k < fillCentroidCollection[c].GroupedDocument[gd].VectorSpace.Length; k++)
                    {
                        newVectorSpaceArray[k] += fillCentroidCollection[c].GroupedDocument[gd].VectorSpace[k];
                    }
                }
            }

            for (int c1 = 0; c1 < fillCentroidCollection.Count; c1++)
            {
                for (int gd1 = 0; gd1 < fillCentroidCollection[c1].GroupedDocument.Count; gd1++)
                {
                    for (int k1 = 0; k1 < fillCentroidCollection[c1].GroupedDocument[gd1].VectorSpace.Length; k1++)
                    {
                        newVectorSpaceArray[k1] = newVectorSpaceArray[k1] / fillCentroidCollection[c1].GroupedDocument.Count;
                    }
                }
            }

            float minDist      = 0.1F;
            float currentValue = 0.1F;
            int   index        = 0;

            for (int i = 0; i < fillCentroidCollection.Count; i++)
            {
                minDistancesToCluster = new float[fillCentroidCollection[i].GroupedDocument.Count];
                for (int j = 0; j < fillCentroidCollection[i].GroupedDocument.Count; j++)
                {
                    //minDistancesToCluster = new float[fillCentroidCollection[i].GroupedDocument.Count];
                    minDistancesToCluster[j] = SimilarityMatrixCalculations.FindEuclideanDistance(fillCentroidCollection[i].GroupedDocument.First().VectorSpace, fillCentroidCollection[i].GroupedDocument[j].VectorSpace);
                    //}

                    for (int z = 0; z < minDistancesToCluster.Length; z++)
                    {
                        currentValue = minDistancesToCluster[z];
                        if (currentValue <= minDist && currentValue != 0)
                        {
                            minDist = currentValue;
                            index   = z;
                        }
                        //here we must to find the closest document to new vectorSpace;
                        //for all docs in cluster create the vectorSpace
                    }

                    /*
                     * DocumentVector newClusterCenter = fillCentroidCollection[i].GroupedDocument[index];
                     * fillCentroidCollection[i].GroupedDocument.Clear();
                     * fillCentroidCollection[i].GroupedDocument.Add(newClusterCenter);
                     */
                }
                DocumentVector newClusterCenter = fillCentroidCollection[i].GroupedDocument[index];
                index = 0;
                fillCentroidCollection[i].GroupedDocument.Clear();
                fillCentroidCollection[i].GroupedDocument.Add(newClusterCenter);
            }

            minDistancesToCluster = new float[0];
            result = new List <Centroid>(fillCentroidCollection);
            return(result);
        }
Exemple #8
0
 private static float Euclidean_Distance(Centroid a, Centroid b)
 {
     return(SimilarityMatrixCalculations.FindEuclideanDistance(a.GroupedDocument[0].VectorSpace, b.GroupedDocument[0].VectorSpace));
 }