public int[] Train(InstanceRepresentation set) { set.Standardize(); var instances = set.Instances.ToArray(); _weights = new float[_gridDimensions[0] * _gridDimensions[1], set.FeauturesCount]; FeaturesCount = set.FeauturesCount; // Intialize weights with gaussians var n = _gridDimensions[0] * _gridDimensions[1]; if (!_usePlusPlusInit || n >= instances.Length) { for (ushort i = 0; i < n; i++) { for (ushort j = 0; j < FeaturesCount; j++) { _weights[i, j] = (float)GaussHelper.InvPhi(_random.NextDouble()); } } } else { _weights = PlusPlusInitializer.InitializeCentroids(n, instances, _random); } for (var i = 0; i < _iterationsCount; i++) { var instance = instances[_random.Next(instances.Length)]; // Best Matching Unit var bmuIndex = Instances.MinEucDistanceIndex(instance, _weights); BMUCoordinates = ToCoordinates(bmuIndex); UpdateHexagonWeights((ushort)NeighbourhoodRadius(i), LearningRate(i), BMUCoordinates, instance.GetValues()); } var instancesClusters = new int[instances.Length]; for (var i = 0; i < instances.Length; i++) { instancesClusters[i] = Instances.MinEucDistanceIndex(instances[i], _weights); } return(instancesClusters); }
/// <summary> /// Trains the centroids. /// </summary> public void Train(InstanceRepresentation set) { var instances = set.Instances.ToArray(); // Initializing the centroids: if (_usePlusPlusInit) { _centroids = PlusPlusInitializer.InitializeCentroids(K, instances, _random); } else { _centroids = Instances.ConvertToArray(instances.SampleNoReplacement(K, _random)); } _isInitialized = true; FeaturesCount = set.FeauturesCount; for (var i = 0; i < _iterationsCount; i++) { var miniBatch = instances.SampleReplacement(_minibatchSize, _random); MiniBatchUpdate(miniBatch, set.IsSparseDataset); } }
public void MiniBatchUpdate(IInstance[] miniBatch, bool isSparseMiniBatch) { var minibathSize = miniBatch.Length; if (!_isInitialized) { // Initializing the centroids: if (_usePlusPlusInit) { _centroids = PlusPlusInitializer.InitializeCentroids(K, miniBatch, _random); } else { _centroids = Instances.ConvertToArray( miniBatch.SampleReplacement(FeaturesCount, _random)); } _isInitialized = true; } var nearestClusters = new int[_minibatchSize]; var perCenterCount = new int[_centroids.Length]; for (var j = 0; j < minibathSize; j++) { nearestClusters[j] = Instances.MinEucDistanceIndex(miniBatch[j], _centroids); } // If the dataset is not sparse we perform the non sparse cluster computation. if (!isSparseMiniBatch) { for (var j = 0; j < minibathSize; j++) { perCenterCount[nearestClusters[j]] += 1; var learningRate = 1.0 / perCenterCount[nearestClusters[j]]; for (var k = 0; k < FeaturesCount; k++) { var c = _centroids[nearestClusters[j], k]; _centroids[nearestClusters[j], k] = (float)((1.0 - learningRate) * c + miniBatch[j].GetValue(k) * learningRate); } } } else // If the dataset is sparse we perform the sparse clustering version. { for (var j = 0; j < minibathSize; j++) { var current = _centroids.L1Norm(nearestClusters[j], FeaturesCount); if (current <= Epsilon + Lambda) { break; } var upper = _centroids.Max(nearestClusters[j], FeaturesCount); var lower = 0.0; var theta = 0.0; while (current < Lambda * (Epsilon + 1) || current < Lambda) { theta = (upper + lower) / 2.0; // Get L1 value current = 0.0; for (var k = 0; k < FeaturesCount; k++) { current += Math.Max(0, Math.Abs(_centroids[nearestClusters[j], k]) - theta); if (current <= Lambda) { upper = theta; } else { lower = theta; } } } for (var k = 0; k < FeaturesCount; k++) { var c = _centroids[nearestClusters[j], k]; _centroids[nearestClusters[j], k] = (float)(Math.Sign(c) * Math.Max(0, Math.Abs(c) - theta)); } } } }