예제 #1
0
        public static List <TestCentroid> GetClustersTest(List <TestCentroid> clusters, float alpha, List <DocumentVectorTest> data)
        {
            List <TestCentroid> newclusters = new List <TestCentroid>();
            int   N = data.Count;
            int   number_of_clusters = clusters.Count;
            float MIN_POINTS         = alpha * N;

            for (int i = 0; i < number_of_clusters; i++)
            {
                TestCentroid centroid = new TestCentroid
                {
                    GroupedDocument = new List <DocumentVectorTest>()
                };

                if (clusters[i].GroupedDocument.Count >= MIN_POINTS)
                {
                    foreach (var elements in clusters[i].GroupedDocument)
                    {
                        centroid.GroupedDocument.Add(elements);
                        newclusters.Add(centroid);
                    }
                }
            }
            return(newclusters);
        }
예제 #2
0
        public static Tuple <int[], int[], List <TestCentroid> > Set(List <DocumentVectorTest> docCollection)
        {
            Tuple <int[], int[], List <TestCentroid> > result;

            parent = new int[docCollection.Count];
            rank   = new int[docCollection.Count];
            var cntroidSet = new List <TestCentroid>();

            for (int i = 0; i < docCollection.Count; i++)
            {
                parent[i] = i;
                rank[i]   = 0;
            }

            TestCentroid newCentroid;

            //here is a problem cntroidSet.Count must be 46 not 23!!!
            List <DocumentVectorTest> docCollectionCopy = new List <DocumentVectorTest>(docCollection);

            for (int j = 0; j < docCollectionCopy.Count; j++)
            {
                newCentroid = new TestCentroid();
                newCentroid.GroupedDocument = new List <DocumentVectorTest>();
                newCentroid.GroupedDocument.Add(docCollectionCopy[j]);
                cntroidSet.Add(newCentroid);
            }


            result = new Tuple <int[], int[], List <TestCentroid> >(parent, rank, cntroidSet);

            return(result);
        }
예제 #3
0
        public static List <TestCentroid> CentroidCalculationsForKMeans(List <DocumentVectorTest> data, int ClusterNumber)
        {
            List <TestCentroid> centroidList = new List <TestCentroid>();
            Random        randomizer         = new Random();
            HashSet <int> indexSet           = new HashSet <int>();
            int           index = 0;

            while (centroidList.Count != ClusterNumber)
            {
                index = randomizer.Next(0, data.Count + 1);
                if (!indexSet.Contains(index))
                {
                    indexSet.Add(index);
                    TestCentroid newCentroid = new TestCentroid();
                    newCentroid.GroupedDocument = new List <DocumentVectorTest>();
                    newCentroid.GroupedDocument.Add(data[index]);
                    centroidList.Add(newCentroid);
                }
                else if (indexSet.Contains(index))
                {
                    continue;
                }
            }
            foreach (var doc in centroidList)
            {
                doc.CalculateMeans();
                doc.GroupedDocument.Clear();
            }
            return(centroidList);
        }
예제 #4
0
        public static float[] CalculateProbabilityArray_Test(TestCentroid oldCentroid, List <DocumentVectorTest> vSpace)
        {
            List <DocumentVectorTest> vSpaceCopy = new List <DocumentVectorTest>(vSpace);

            float[] vector_A     = oldCentroid.GroupedDocument[0].VectorSpace;
            float[] DistanceQuad = new float[vSpaceCopy.Count];

            for (int i = 0; i < DistanceQuad.Length; i++)
            {
                DistanceQuad[i] = 0;
            }

            float SumDistanceQuad = 0;
            int   previous_index  = vSpaceCopy.IndexOf(oldCentroid.GroupedDocument[0]);

            for (int j = 0; j <= vSpaceCopy.Count - 1; j++)
            {
                float[] vector_B = vSpaceCopy[j].VectorSpace;
                for (int k = 0; k <= vSpaceCopy[j].VectorSpace.Length - 1; k++)
                {
                    DistanceQuad[j] += (float)Math.Pow((vector_A[k] - vector_B[k]), 2);
                }
                SumDistanceQuad += DistanceQuad[j];
            }
            for (int j = 0; j <= DistanceQuad.Length - 1; j++)
            {
                DistanceQuad[j] = DistanceQuad[j] / SumDistanceQuad;
            }
            return(DistanceQuad);
        }
예제 #5
0
        public static List <TestCentroid> CreateClusterSet(int clusterNumber)
        {
            List <TestCentroid> result = new List <TestCentroid>();

            for (int i = 0; i < clusterNumber; i++)
            {
                TestCentroid centroid = new TestCentroid();
                centroid.GroupedDocument = new List <DocumentVectorTest>();
                result.Add(centroid);
            }
            return(result);
        }
        private static List <TestCentroid> TestCentroidInitializer(int numClusters)
        {
            List <TestCentroid> initializedList = new List <TestCentroid>(numClusters);

            for (int i = 0; i < numClusters; i++)
            {
                TestCentroid newTestCentroid = new TestCentroid();
                newTestCentroid.GroupedDocument = new List <DocumentVectorTest>();
                initializedList.Add(newTestCentroid);
            }
            return(initializedList);
        }
예제 #7
0
        private static TestCentroid Calculate_Next_Centroid_Test(TestCentroid firstcentroid, List <DocumentVectorTest> vSpace)
        {
            TestCentroid next_centroid = new TestCentroid();

            next_centroid.GroupedDocument = new List <DocumentVectorTest>();
            List <DocumentVectorTest> vSpaceCopy = new List <DocumentVectorTest>(vSpace);

            float[] probabilitiesMatrixSimple = CalculateProbabilityArray_Test(firstcentroid, vSpaceCopy);
            float[] probabilitiesMatrix       = new float[probabilitiesMatrixSimple.Length];

            for (var i = 0; i < probabilitiesMatrix.Length; i++)
            {
                probabilitiesMatrix[i] = 0;
            }

            for (var i = 0; i < probabilitiesMatrix.Length; i++)
            {
                for (var j = 0; j < i; j++)
                {
                    probabilitiesMatrix[i] += probabilitiesMatrixSimple[j];
                }
            }

            Random rand = new Random();

            float interval_Value                = (float)rand.NextDouble();
            float sum_Of_Probabilies            = 0.0F;
            int   index_of_min_distance_element = 0;

            for (int i = 0; i < probabilitiesMatrix.Length; i++)
            {
                sum_Of_Probabilies += probabilitiesMatrix[i];
                //here are the problem! - trying to fix;
            }
            for (int j = 0; j < probabilitiesMatrix.Length; j++)
            {
                if (sum_Of_Probabilies > interval_Value & sum_Of_Probabilies < probabilitiesMatrix[j])
                {
                    index_of_min_distance_element = j - 1;
                }
                else
                {
                    continue;
                }
            }
            next_centroid.GroupedDocument.Add(vSpaceCopy[index_of_min_distance_element]);
            vSpaceCopy.RemoveAt(index_of_min_distance_element);
            // but here we can find distance from oldCentroid tp old Centroid
            return(next_centroid);
        }
예제 #8
0
        public static List <TestCentroid> CentroidCalculationsForTestKMeansPP(List <DocumentVectorTest> dataPP, int ClusterNumberPP)
        {
            List <TestCentroid>       centroidListPP    = new List <TestCentroid>();
            List <DocumentVectorTest> dataPPCopy        = new List <DocumentVectorTest>(dataPP);
            List <DocumentVectorTest> existingCentroids = new List <DocumentVectorTest>();
            Random randomizerPP = new Random();

            float[]      distances           = new float[dataPP.Count];
            int          indexOfFirstElement = randomizerPP.Next(0, dataPP.Count);// + 1);
            TestCentroid firstCentroid       = new TestCentroid();

            firstCentroid.GroupedDocument = new List <DocumentVectorTest>();
            firstCentroid.GroupedDocument.Add(dataPP[indexOfFirstElement]);
            centroidListPP.Add(firstCentroid);
            HashSet <TestCentroid> stringHashSet = new HashSet <TestCentroid>();

            while (centroidListPP.Count != ClusterNumberPP)
            {
                TestCentroid newCentroid = new TestCentroid();
                newCentroid.GroupedDocument = new List <DocumentVectorTest>();
                newCentroid = Calculate_Next_Centroid_Test(firstCentroid, dataPPCopy);
                if (!existingCentroids.Contains(newCentroid.GroupedDocument[0]))
                {
                    existingCentroids.Add(newCentroid.GroupedDocument[0]);
                    centroidListPP.Add(newCentroid);
                    //zmiana1
                    stringHashSet.Add(newCentroid);
                    firstCentroid = newCentroid;
                    dataPPCopy.Remove(newCentroid.GroupedDocument[0]);
                }
                //zmiana2
                else if (existingCentroids.Contains(newCentroid.GroupedDocument[0]) || stringHashSet.Contains(newCentroid))
                {
                    continue;
                }
                //zmiana 3
                centroidListPP = stringHashSet.ToList();
            }
            return(centroidListPP);
        }
예제 #9
0
        public static List <TestCentroid> GetClustersTest(int[] clusters, float alpha, List <DocumentVectorTest> data)
        {
            List <TestCentroid> centroidSet = new List <TestCentroid>();
            HashSet <int>       clustersSet = new HashSet <int>();
            int   N = data.Count;
            int   number_of_clusters = clusters.Length;
            float MIN_POINTS         = alpha * N;

            for (int i = 0; i < N; i++)
            {
                clustersSet.Add(clusters[i]);
            }

            for (int i = 0; i < clustersSet.Count; i++)
            {
                TestCentroid centroid = new TestCentroid
                {
                    GroupedDocument = new List <DocumentVectorTest>()
                };
                var docIndex = clustersSet.ElementAt(i);
                centroid.GroupedDocument.Add(data[docIndex]);
                centroidSet.Add(centroid);
            }



            for (int j = 0; j < clustersSet.Count; j++)
            {
                for (int i = 0; i < N; i++)
                {
                    if (clustersSet.ElementAt(j) == clusters[i])
                    {
                        centroidSet[j].GroupedDocument.Add(data[i]);
                    }
                }
            }

            return(centroidSet);
        }