Ejemplo n.º 1
0
        public int[] Train(InstanceRepresentation set)
        {
            set.Standardize();
            var instances = set.Instances.ToArray();

            _weights      = new float[_gridDimensions[0] * _gridDimensions[1], set.FeauturesCount];
            FeaturesCount = set.FeauturesCount;

            // Intialize weights with gaussians
            var n = _gridDimensions[0] * _gridDimensions[1];

            if (!_usePlusPlusInit || n >= instances.Length)
            {
                for (ushort i = 0; i < n; i++)
                {
                    for (ushort j = 0; j < FeaturesCount; j++)
                    {
                        _weights[i, j] = (float)GaussHelper.InvPhi(_random.NextDouble());
                    }
                }
            }
            else
            {
                _weights = PlusPlusInitializer.InitializeCentroids(n, instances, _random);
            }

            for (var i = 0; i < _iterationsCount; i++)
            {
                var instance = instances[_random.Next(instances.Length)];
                // Best Matching Unit
                var bmuIndex = Instances.MinEucDistanceIndex(instance, _weights);
                BMUCoordinates = ToCoordinates(bmuIndex);
                UpdateHexagonWeights((ushort)NeighbourhoodRadius(i), LearningRate(i), BMUCoordinates, instance.GetValues());
            }

            var instancesClusters = new int[instances.Length];

            for (var i = 0; i < instances.Length; i++)
            {
                instancesClusters[i] = Instances.MinEucDistanceIndex(instances[i], _weights);
            }

            return(instancesClusters);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Trains the centroids.
        /// </summary>
        public void Train(InstanceRepresentation set)
        {
            var instances = set.Instances.ToArray();

            // Initializing the centroids:
            if (_usePlusPlusInit)
            {
                _centroids = PlusPlusInitializer.InitializeCentroids(K, instances, _random);
            }
            else
            {
                _centroids = Instances.ConvertToArray(instances.SampleNoReplacement(K, _random));
            }

            _isInitialized = true;
            FeaturesCount  = set.FeauturesCount;

            for (var i = 0; i < _iterationsCount; i++)
            {
                var miniBatch = instances.SampleReplacement(_minibatchSize, _random);
                MiniBatchUpdate(miniBatch, set.IsSparseDataset);
            }
        }
Ejemplo n.º 3
0
        public void MiniBatchUpdate(IInstance[] miniBatch, bool isSparseMiniBatch)
        {
            var minibathSize = miniBatch.Length;

            if (!_isInitialized)
            {
                // Initializing the centroids:
                if (_usePlusPlusInit)
                {
                    _centroids = PlusPlusInitializer.InitializeCentroids(K, miniBatch, _random);
                }
                else
                {
                    _centroids = Instances.ConvertToArray(
                        miniBatch.SampleReplacement(FeaturesCount, _random));
                }

                _isInitialized = true;
            }

            var nearestClusters = new int[_minibatchSize];
            var perCenterCount  = new int[_centroids.Length];

            for (var j = 0; j < minibathSize; j++)
            {
                nearestClusters[j] = Instances.MinEucDistanceIndex(miniBatch[j], _centroids);
            }

            // If the dataset is not sparse we perform the non sparse cluster computation.
            if (!isSparseMiniBatch)
            {
                for (var j = 0; j < minibathSize; j++)
                {
                    perCenterCount[nearestClusters[j]] += 1;
                    var learningRate = 1.0 / perCenterCount[nearestClusters[j]];

                    for (var k = 0; k < FeaturesCount; k++)
                    {
                        var c = _centroids[nearestClusters[j], k];
                        _centroids[nearestClusters[j], k] = (float)((1.0 - learningRate) * c + miniBatch[j].GetValue(k) * learningRate);
                    }
                }
            }
            else // If the dataset is sparse we perform the sparse clustering version.
            {
                for (var j = 0; j < minibathSize; j++)
                {
                    var current = _centroids.L1Norm(nearestClusters[j], FeaturesCount);

                    if (current <= Epsilon + Lambda)
                    {
                        break;
                    }

                    var upper = _centroids.Max(nearestClusters[j], FeaturesCount);
                    var lower = 0.0;
                    var theta = 0.0;

                    while (current < Lambda * (Epsilon + 1) || current < Lambda)
                    {
                        theta   = (upper + lower) / 2.0; // Get L1 value
                        current = 0.0;
                        for (var k = 0; k < FeaturesCount; k++)
                        {
                            current += Math.Max(0, Math.Abs(_centroids[nearestClusters[j], k]) - theta);
                            if (current <= Lambda)
                            {
                                upper = theta;
                            }
                            else
                            {
                                lower = theta;
                            }
                        }
                    }

                    for (var k = 0; k < FeaturesCount; k++)
                    {
                        var c = _centroids[nearestClusters[j], k];
                        _centroids[nearestClusters[j], k] = (float)(Math.Sign(c) * Math.Max(0, Math.Abs(c) - theta));
                    }
                }
            }
        }