Пример #1
0
        /// <summary>
        /// </summary>
        /// <param name="dataView"></param>
        /// <returns>float dimension array where index of row is number of cluster and column is a row</returns>
        private Row[][] Init2DimensionArrayClusteredRow(IDataViewClustered dataView)
        {
            int rowsLength = dataView.Rows.Length;

            var countOfObjectsInClusters = dataView.Clustered
                                           .GroupBy(c => c)
                                           .Select(c => new { Key = c.Key, Count = c.Count() })
                                           .ToList();

            int countOfClusters = countOfObjectsInClusters.Count;

            Row[][] rows = new Row[countOfClusters][];

            for (int i = 0; i < countOfClusters; i++)
            {
                var numberCluster         = countOfObjectsInClusters[i].Key;
                var countObjectsInCluster = countOfObjectsInClusters[i].Count;
                rows[numberCluster] = new Row[countObjectsInCluster];
            }

            int[] indexesInClusters = new int[countOfClusters];

            for (int i = 0; i < rowsLength; i++)
            {
                int cluster = dataView.Clustered[i];
                int index   = indexesInClusters[cluster];

                rows[cluster][index] = dataView.Rows[i];

                indexesInClusters[cluster] += 1;
            }

            return(rows);
        }
Пример #2
0
        private int CalculateValueR(IDataViewClustered dataView)
        {
            var clusteredArray = dataView.Clustered.GroupBy(c => c).Select(c => c.Count()).ToArray();

            int sygmaByN = 0;

            for (int i = 0; i < clusteredArray.Length; i++)
            {
                var N = clusteredArray[i];
                sygmaByN += N * (N - 1);
            }

            int res = sygmaByN / 2;

            return(res);
        }
Пример #3
0
        public float EvaluateQuality(IDataViewClustered dataView, ICalculationDistance calculationDistance)
        {
            Row[][] rows = Init2DimensionArrayClusteredRow(dataView);
            List <DistanceClustered> distances = CalculateDistancesByClusters(rows, calculationDistance);

            float D = CalculateSumOfDistancesForEachCluster(distances);
            int   R = CalculateValueR(dataView);

            Distance[] allDistances = CalculateDistanceForEach(dataView.Rows, calculationDistance);
            Array.Sort(allDistances, new DistanceComparer <Distance>());
            float Dmin = CalculateSumOfDistanceFromDirection(true, R, allDistances);
            float Dmax = CalculateSumOfDistanceFromDirection(false, R, allDistances);

            float res = (D - Dmin) / (Dmax - Dmin);

            return(res);
        }
        public Row[] CalculateCentroids(IDataViewClustered dataViewClustered)
        {
            var clusters        = dataViewClustered.Clustered.Distinct();
            var rowsSize        = dataViewClustered.Rows.Length;
            var columnsLength   = dataViewClustered.Columns.Length;
            var countOfClusters = clusters.Count();

            Row[] centroids = new Row[countOfClusters];
            for (int i = 0; i < countOfClusters; i++)
            {
                centroids[i]      = new Row();
                centroids[i].Rows = new float[columnsLength];
            }

            for (int i = 0; i < rowsSize; i++)
            {
                int indexCluster = dataViewClustered.Clustered[i];
                for (int j = 0; j < dataViewClustered.Columns.Length; j++)
                {
                    centroids[indexCluster].Rows[j] += dataViewClustered.Rows[i].Rows[j];
                }
            }

            var clustredBy = dataViewClustered.Clustered
                             .GroupBy(c => c)
                             .Select(c => new { Value = c.Key, Count = c.Count() })
                             .ToDictionary(c => c.Value, i => i.Count);

            for (int i = 0; i < centroids.Length; i++)
            {
                for (int j = 0; j < columnsLength; j++)
                {
                    centroids[i].Rows[j] /= clustredBy[i];
                }
            }

            return(centroids);
        }
Пример #5
0
        public IDataViewClustered FitPredict(IDataView data)
        {
            Row[] startCentroids = MethodInitialization.InitStartCentroidsPositions(data, NumberOfClusters);
            IDataViewClustered dataViewClustered = InitDataViewClustered(data, startCentroids);

            bool exit = false;

            do
            {
                Row[] nextCentroids = MethodInitialization.CalculateCentroids(dataViewClustered);

                if (!startCentroids.SequenceEqual(nextCentroids, new RowComparer()))
                {
                    dataViewClustered = InitDataViewClustered(data, nextCentroids);
                    startCentroids    = (Row[])nextCentroids.Clone();
                }
                else
                {
                    exit = true;
                }
            } while (exit == false);

            return(dataViewClustered);
        }