private static DocDetails GetAllDetails(List <DocumentVector> docCollection, DocumentVector seedPoint, DocDetails docDetails)
        {
            float[] Weights = new float[docCollection.Count];
            float   minD    = float.MaxValue;
            float   Sum     = 0;
            int     i       = 0;

            foreach (DocumentVector point in docCollection)
            {
                if (point == seedPoint) //Delta is 0
                {
                    continue;
                }

                Weights[i] = KMeansPlus.GetEucliedeanDistance(point, seedPoint);
                Sum       += Weights[i];
                if (Weights[i] < minD)
                {
                    minD = Weights[i];
                }
                i++;
            }

            docDetails.SeedDocVect = seedPoint;
            docDetails.Weights     = Weights;
            docDetails.Sum         = Sum;
            docDetails.MinD        = minD;

            return(docDetails);
        }
        public static DocDetails GetMinimalPointDistance(List <DocDetails> pds)
        {
            float             minValue       = float.MinValue;
            List <DocDetails> sameDistValues = new List <DocDetails>();

            foreach (DocDetails pd in pds)
            {
                if (pd.MinD < minValue)
                {
                    sameDistValues.Clear();
                    minValue = pd.MinD;
                    sameDistValues.Add(pd);
                }
                if (pd.MinD == minValue)
                {
                    if (!sameDistValues.Contains(pd))
                    {
                        sameDistValues.Add(pd);
                    }
                }
            }
            if (sameDistValues.Count > 1)
            {
                return(sameDistValues[KMeansPlus.GetRandNumCrypto(0, sameDistValues.Count)]);
            }
            else
            {
                return(sameDistValues[0]);
            }
        }
        private static int GetWeightedProbDist(float[] w, float s)
        {
            float p = KMeansPlus.GetRandNumCrypto();
            float q = 0;
            int   i = -1;

            while (q < p)
            {
                i++;
                q += (w[i] / s);
            }
            return(i);
        }
        public static List <DocumentVector> GetSeedPoints2v(List <DocumentVector> docCollection, int k)
        {
            List <DocumentVector> seedPoints = new List <DocumentVector>(k);
            DocDetails            docDetails;
            List <DocDetails>     docDetailsList = new List <DocDetails>();
            int index = 0;

            int            firstIndex = KMeansPlus.GenerateRandomNumber(0, docCollection.Count);
            DocumentVector FirstPoint = docCollection[firstIndex];

            seedPoints.Add(FirstPoint);

            for (int i = 0; i < k - 1; i++)
            {
                if (seedPoints.Count >= 2)
                {
                    DocDetails minpd = GetMinimalPointDistance(docDetailsList);
                    index = GetWeightedProbDist(minpd.Weights, minpd.Sum);
                    DocumentVector SubsequentPoint = docCollection[index];

                    docDetails = new DocDetails();
                    docDetails = GetAllDetails(docCollection, SubsequentPoint, docDetails);
                    docDetailsList.Add(docDetails);
                }
                else
                {
                    docDetails = new DocDetails();
                    docDetails = GetAllDetails(docCollection, FirstPoint, docDetails);
                    docDetailsList.Add(docDetails);
                    index = GetWeightedProbDist(docDetails.Weights, docDetails.Sum);
                    DocumentVector SecondPoint = docCollection[index];
                    seedPoints.Add(SecondPoint);

                    docDetails = new DocDetails();
                    docDetails = GetAllDetails(docCollection, SecondPoint, docDetails);
                    docDetailsList.Add(docDetails);
                }
            }
            return(seedPoints);
        }
Exemplo n.º 5
0
/*
 *      public static ClusterPoint GetKMeansPP(List<DocumentVector> allPoints, int k)
 *      {
 *          List<DocumentVector> seedPoints = AdditionalFunctionalityForSecondKMeansppimplementation.GetSeedPoints2v(allPoints, k);
 *
 *          //ClusterPoint result = KMeansCalculation(allPoints, seedPoints, k);
 *
 *          //return result;
 *      }
 */
        private static DocumentVector KMeansCalculation(List <DocumentVector> docList, List <DocumentVector> seedPoints, int k)
        {
            DocumentVector cluster = new DocumentVector();

            float[] Distances = new float[k];
            float   minD      = float.MaxValue;
            List <DocumentVector> sameDPoint = new List <DocumentVector>();
            bool exit = true;

            foreach (DocumentVector vectror in docList)
            {
                foreach (DocumentVector seedPoint in seedPoints)
                {
                    float dist = KMeansPlus.GetEucliedeanDistance(vectror, seedPoint);
                    if (dist < minD)
                    {
                        sameDPoint.Clear();
                        minD = dist;
                        sameDPoint.Add(seedPoint);
                    }
                    if (dist == minD)
                    {
                        if (!sameDPoint.Contains(seedPoint))
                        {
                            sameDPoint.Add(seedPoint);
                        }
                    }
                }

                DocumentVector keyPoint;
                if (sameDPoint.Count > 1)
                {
                    int index = KMeansPlus.GetRandNumCrypto(0, sameDPoint.Count);
                    keyPoint = sameDPoint[index];
                }
                else
                {
                    keyPoint = sameDPoint[0];
                }

                /*
                 * //Assign ensemble point to correct central point cluster
                 * if (!cluster.ClustersPoint.ContainsKey(keyPoint))  //New
                 * {
                 *  List<Point> newCluster = new List<Point>();
                 *  newCluster.Add(p);
                 *  cluster.PC.Add(keyPoint, newCluster);
                 * }
                 * else
                 * {   //Existing cluster centre
                 *  cluster.PC[keyPoint].Add(p);
                 * }
                 */
                //Reset
                sameDPoint.Clear();
                minD = float.MaxValue;
            }

            if (exit)
            {
                return(cluster);
            }
            else
            {
                return(KMeansCalculation(docList, seedPoints, k));
            }
        }