public void sparse_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalSparseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            ordinalSparseSet.AddInstance(instance1);
            ordinalSparseSet.AddInstance(instance2);
            ordinalSparseSet.AddInstance(instance3);
            ordinalSparseSet.AddInstance(instance4);

            var clustering = new MiniBatchClustering(2, 3, 10);

            clustering.Train(ordinalSparseSet);
            var categories = clustering.Cluster(ordinalSparseSet);
        }
Esempio n. 2
0
        public void dense_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false);

            ordinalDenseSet.AddInstance(instance1);
            ordinalDenseSet.AddInstance(instance2);
            ordinalDenseSet.AddInstance(instance3);
            ordinalDenseSet.AddInstance(instance4);

            var som = new SelfOrganizingMap(10, new ushort[] { 20, 20 }, 100, 3);

            var categories = som.Train(ordinalDenseSet);
        }
        public void sparse_features_medium()
        {
            // Cluster generator settings
            var maxRadius    = 100;
            var minRadius    = 10;
            var clusterCount = 10;

            // Data size
            var featureDim = 100;
            var obsCount   = 2000;

            var inputFeaturesTypes = new InputFeatureTypes[featureDim];

            for (var i = 0; i < featureDim; i++)
            {
                inputFeaturesTypes[i] = InputFeatureTypes.Ordinal;
            }

            var sparseOrdinalSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            var clusterGenerator  = new SyntheticDataGenerator(maxRadius, minRadius, obsCount, clusterCount, featureDim);
            var trueClusterLabels = new List <int>();

            using (var obsGetter = clusterGenerator.GenerateClusterObservations().GetEnumerator())
            {
                var isNextObservation = obsGetter.MoveNext();

                while (isNextObservation)
                {
                    var cluster = obsGetter.Current.Item1;
                    var obs     = obsGetter.Current.Item2;
                    trueClusterLabels.Add(cluster);
                    sparseOrdinalSet.AddInstance(obs);

                    isNextObservation = obsGetter.MoveNext();
                }
            }

            var clustering = new MiniBatchClustering(clusterCount, 100, 2000);

            clustering.Train(sparseOrdinalSet);
            var categories = clustering.Cluster(sparseOrdinalSet);

            var metricsGenerator = new MetricsGenerator();

            metricsGenerator.Add(Metrics.Purity);
            for (var i = 0; i < categories.Length; i++)
            {
                metricsGenerator.AddResult(categories[i], trueClusterLabels[i]);
                metricsGenerator.UpdateMetrics();
            }

            var purity = metricsGenerator.GetMetric(Metrics.Purity);

            Assert.True(purity + Epsilon - 0.6 > 0);
        }
Esempio n. 4
0
        /// <summary>
        /// Directly train and cluster the provided dataset.
        /// </summary>
        public int[] Cluster(InstanceRepresentation set)
        {
            if (!_isInitialized)
            {
                throw new InvalidOperationException("The centroids are not trained.");
            }

            var instancesClusters = new int[set.Instances.Count];

            for (var i = 0; i < set.Instances.Count; i++)
            {
                instancesClusters[i] = Instances.MinEucDistanceIndex(set.Instances[i], _centroids);
            }

            return(instancesClusters);
        }
Esempio n. 5
0
        public int[] Train(InstanceRepresentation set)
        {
            set.Standardize();
            var instances = set.Instances.ToArray();

            _weights      = new float[_gridDimensions[0] * _gridDimensions[1], set.FeauturesCount];
            FeaturesCount = set.FeauturesCount;

            // Intialize weights with gaussians
            var n = _gridDimensions[0] * _gridDimensions[1];

            if (!_usePlusPlusInit || n >= instances.Length)
            {
                for (ushort i = 0; i < n; i++)
                {
                    for (ushort j = 0; j < FeaturesCount; j++)
                    {
                        _weights[i, j] = (float)GaussHelper.InvPhi(_random.NextDouble());
                    }
                }
            }
            else
            {
                _weights = PlusPlusInitializer.InitializeCentroids(n, instances, _random);
            }

            for (var i = 0; i < _iterationsCount; i++)
            {
                var instance = instances[_random.Next(instances.Length)];
                // Best Matching Unit
                var bmuIndex = Instances.MinEucDistanceIndex(instance, _weights);
                BMUCoordinates = ToCoordinates(bmuIndex);
                UpdateHexagonWeights((ushort)NeighbourhoodRadius(i), LearningRate(i), BMUCoordinates, instance.GetValues());
            }

            var instancesClusters = new int[instances.Length];

            for (var i = 0; i < instances.Length; i++)
            {
                instancesClusters[i] = Instances.MinEucDistanceIndex(instances[i], _weights);
            }

            return(instancesClusters);
        }
Esempio n. 6
0
        public void standardize_dense_instance()
        {
            var instance1 = new[] { -1f, 5f, 0f, 1f };
            var instance2 = new[] { 0f, 6f, 1f, 1f };
            var instance3 = new[] { 3f, 7f, 1f, 1f };
            var instance4 = new[] { 4f, 0f, 1f, 0f };

            var feature1Sigma = Math.Sqrt(new[] { -1f, 0f, 3f, 4f }.Variance());
            var feature2Sigma = Math.Sqrt(new[] { 5f, 6f, 7f, 0f }.Variance());

            var feature1Mean = new[] { -1f, 0f, 3f, 4f }.Mean();
            var feature2Mean = new[] { 5f, 6f, 7f, 0f }.Mean();

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Flags,
                InputFeatureTypes.Flags
            };

            var mixedDenseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: false);

            mixedDenseSet.AddInstance(instance1);
            mixedDenseSet.AddInstance(instance2);
            mixedDenseSet.AddInstance(instance3);
            mixedDenseSet.AddInstance(instance4);

            mixedDenseSet.Standardize();

            var value1 = mixedDenseSet.Instances[0].GetValues();

            Assert.True(Math.Abs((-1f - feature1Mean) / feature1Sigma - value1[0]) < Epsilon);
            Assert.True((Math.Abs((5f - feature2Mean) / feature2Sigma - value1[1]) < Epsilon));
            Assert.Equal(-1f, value1[2]);
            Assert.Equal(1f, value1[3]);
        }
Esempio n. 7
0
        public void rescale_sparse_instance()
        {
            var instance1 = new[] { 1f, 5f, 0f, 1f };
            var instance2 = new[] { 0f, 6f, 1f, 1f };
            var instance3 = new[] { 3f, 7f, 1f, 1f };
            var instance4 = new[] { 4f, 0f, 1f, 0f };

            var feature1Max = 4f;
            var feature2Max = 7f;

            var feature1Min = 0f;
            var feature2Min = 0f;

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Flags,
                InputFeatureTypes.Flags
            };

            var mixedSparseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            mixedSparseSet.AddInstance(instance1);
            mixedSparseSet.AddInstance(instance2);
            mixedSparseSet.AddInstance(instance3);
            mixedSparseSet.AddInstance(instance4);

            mixedSparseSet.Rescale();

            var value1 = mixedSparseSet.Instances[0].GetValues();

            Assert.True(Math.Abs((1f - feature1Min) / feature1Max - value1[0]) < Epsilon);
            Assert.True((Math.Abs((5f - feature2Min) / feature2Max - value1[1]) < Epsilon));
            Assert.Equal(0, value1[2]);
            Assert.Equal(1f, value1[3]);
        }
Esempio n. 8
0
        /// <summary>
        /// Trains the centroids.
        /// </summary>
        public void Train(InstanceRepresentation set)
        {
            var instances = set.Instances.ToArray();

            // Initializing the centroids:
            if (_usePlusPlusInit)
            {
                _centroids = PlusPlusInitializer.InitializeCentroids(K, instances, _random);
            }
            else
            {
                _centroids = Instances.ConvertToArray(instances.SampleNoReplacement(K, _random));
            }

            _isInitialized = true;
            FeaturesCount  = set.FeauturesCount;

            for (var i = 0; i < _iterationsCount; i++)
            {
                var miniBatch = instances.SampleReplacement(_minibatchSize, _random);
                MiniBatchUpdate(miniBatch, set.IsSparseDataset);
            }
        }
Esempio n. 9
0
        public void dense_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false);

            ordinalDenseSet.AddInstance(instance1);
            ordinalDenseSet.AddInstance(instance2);
            ordinalDenseSet.AddInstance(instance3);
            ordinalDenseSet.AddInstance(instance4);

            Assert.Equal(instance2, ordinalDenseSet.Instances[1].GetValues());
            Assert.Equal(4, ordinalDenseSet.FeauturesCount);
        }