Пример #1
0
        /// <summary>
        /// Trains the centroids.
        /// </summary>
        public void Train(InstanceRepresentation set)
        {
            var instances = set.Instances.ToArray();

            // Initializing the centroids:
            if (_usePlusPlusInit)
            {
                _centroids = PlusPlusInitializer.InitializeCentroids(K, instances, _random);
            }
            else
            {
                _centroids = Instances.ConvertToArray(instances.SampleNoReplacement(K, _random));
            }

            _isInitialized = true;
            FeaturesCount  = set.FeauturesCount;

            for (var i = 0; i < _iterationsCount; i++)
            {
                var miniBatch = instances.SampleReplacement(_minibatchSize, _random);
                MiniBatchUpdate(miniBatch, set.IsSparseDataset);
            }
        }
Пример #2
0
        public void MiniBatchUpdate(IInstance[] miniBatch, bool isSparseMiniBatch)
        {
            var minibathSize = miniBatch.Length;

            if (!_isInitialized)
            {
                // Initializing the centroids:
                if (_usePlusPlusInit)
                {
                    _centroids = PlusPlusInitializer.InitializeCentroids(K, miniBatch, _random);
                }
                else
                {
                    _centroids = Instances.ConvertToArray(
                        miniBatch.SampleReplacement(FeaturesCount, _random));
                }

                _isInitialized = true;
            }

            var nearestClusters = new int[_minibatchSize];
            var perCenterCount  = new int[_centroids.Length];

            for (var j = 0; j < minibathSize; j++)
            {
                nearestClusters[j] = Instances.MinEucDistanceIndex(miniBatch[j], _centroids);
            }

            // If the dataset is not sparse we perform the non sparse cluster computation.
            if (!isSparseMiniBatch)
            {
                for (var j = 0; j < minibathSize; j++)
                {
                    perCenterCount[nearestClusters[j]] += 1;
                    var learningRate = 1.0 / perCenterCount[nearestClusters[j]];

                    for (var k = 0; k < FeaturesCount; k++)
                    {
                        var c = _centroids[nearestClusters[j], k];
                        _centroids[nearestClusters[j], k] = (float)((1.0 - learningRate) * c + miniBatch[j].GetValue(k) * learningRate);
                    }
                }
            }
            else // If the dataset is sparse we perform the sparse clustering version.
            {
                for (var j = 0; j < minibathSize; j++)
                {
                    var current = _centroids.L1Norm(nearestClusters[j], FeaturesCount);

                    if (current <= Epsilon + Lambda)
                    {
                        break;
                    }

                    var upper = _centroids.Max(nearestClusters[j], FeaturesCount);
                    var lower = 0.0;
                    var theta = 0.0;

                    while (current < Lambda * (Epsilon + 1) || current < Lambda)
                    {
                        theta   = (upper + lower) / 2.0; // Get L1 value
                        current = 0.0;
                        for (var k = 0; k < FeaturesCount; k++)
                        {
                            current += Math.Max(0, Math.Abs(_centroids[nearestClusters[j], k]) - theta);
                            if (current <= Lambda)
                            {
                                upper = theta;
                            }
                            else
                            {
                                lower = theta;
                            }
                        }
                    }

                    for (var k = 0; k < FeaturesCount; k++)
                    {
                        var c = _centroids[nearestClusters[j], k];
                        _centroids[nearestClusters[j], k] = (float)(Math.Sign(c) * Math.Max(0, Math.Abs(c) - theta));
                    }
                }
            }
        }