/// <summary> /// </summary> /// <param name="dataView"></param> /// <returns>float dimension array where index of row is number of cluster and column is a row</returns> private Row[][] Init2DimensionArrayClusteredRow(IDataViewClustered dataView) { int rowsLength = dataView.Rows.Length; var countOfObjectsInClusters = dataView.Clustered .GroupBy(c => c) .Select(c => new { Key = c.Key, Count = c.Count() }) .ToList(); int countOfClusters = countOfObjectsInClusters.Count; Row[][] rows = new Row[countOfClusters][]; for (int i = 0; i < countOfClusters; i++) { var numberCluster = countOfObjectsInClusters[i].Key; var countObjectsInCluster = countOfObjectsInClusters[i].Count; rows[numberCluster] = new Row[countObjectsInCluster]; } int[] indexesInClusters = new int[countOfClusters]; for (int i = 0; i < rowsLength; i++) { int cluster = dataView.Clustered[i]; int index = indexesInClusters[cluster]; rows[cluster][index] = dataView.Rows[i]; indexesInClusters[cluster] += 1; } return(rows); }
private int CalculateValueR(IDataViewClustered dataView) { var clusteredArray = dataView.Clustered.GroupBy(c => c).Select(c => c.Count()).ToArray(); int sygmaByN = 0; for (int i = 0; i < clusteredArray.Length; i++) { var N = clusteredArray[i]; sygmaByN += N * (N - 1); } int res = sygmaByN / 2; return(res); }
public float EvaluateQuality(IDataViewClustered dataView, ICalculationDistance calculationDistance) { Row[][] rows = Init2DimensionArrayClusteredRow(dataView); List <DistanceClustered> distances = CalculateDistancesByClusters(rows, calculationDistance); float D = CalculateSumOfDistancesForEachCluster(distances); int R = CalculateValueR(dataView); Distance[] allDistances = CalculateDistanceForEach(dataView.Rows, calculationDistance); Array.Sort(allDistances, new DistanceComparer <Distance>()); float Dmin = CalculateSumOfDistanceFromDirection(true, R, allDistances); float Dmax = CalculateSumOfDistanceFromDirection(false, R, allDistances); float res = (D - Dmin) / (Dmax - Dmin); return(res); }
public Row[] CalculateCentroids(IDataViewClustered dataViewClustered) { var clusters = dataViewClustered.Clustered.Distinct(); var rowsSize = dataViewClustered.Rows.Length; var columnsLength = dataViewClustered.Columns.Length; var countOfClusters = clusters.Count(); Row[] centroids = new Row[countOfClusters]; for (int i = 0; i < countOfClusters; i++) { centroids[i] = new Row(); centroids[i].Rows = new float[columnsLength]; } for (int i = 0; i < rowsSize; i++) { int indexCluster = dataViewClustered.Clustered[i]; for (int j = 0; j < dataViewClustered.Columns.Length; j++) { centroids[indexCluster].Rows[j] += dataViewClustered.Rows[i].Rows[j]; } } var clustredBy = dataViewClustered.Clustered .GroupBy(c => c) .Select(c => new { Value = c.Key, Count = c.Count() }) .ToDictionary(c => c.Value, i => i.Count); for (int i = 0; i < centroids.Length; i++) { for (int j = 0; j < columnsLength; j++) { centroids[i].Rows[j] /= clustredBy[i]; } } return(centroids); }
public IDataViewClustered FitPredict(IDataView data) { Row[] startCentroids = MethodInitialization.InitStartCentroidsPositions(data, NumberOfClusters); IDataViewClustered dataViewClustered = InitDataViewClustered(data, startCentroids); bool exit = false; do { Row[] nextCentroids = MethodInitialization.CalculateCentroids(dataViewClustered); if (!startCentroids.SequenceEqual(nextCentroids, new RowComparer())) { dataViewClustered = InitDataViewClustered(data, nextCentroids); startCentroids = (Row[])nextCentroids.Clone(); } else { exit = true; } } while (exit == false); return(dataViewClustered); }