public void sparse_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalSparseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            ordinalSparseSet.AddInstance(instance1);
            ordinalSparseSet.AddInstance(instance2);
            ordinalSparseSet.AddInstance(instance3);
            ordinalSparseSet.AddInstance(instance4);

            var clustering = new MiniBatchClustering(2, 3, 10);

            clustering.Train(ordinalSparseSet);
            var categories = clustering.Cluster(ordinalSparseSet);
        }
Example #2
0
        public void dense_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false);

            ordinalDenseSet.AddInstance(instance1);
            ordinalDenseSet.AddInstance(instance2);
            ordinalDenseSet.AddInstance(instance3);
            ordinalDenseSet.AddInstance(instance4);

            var som = new SelfOrganizingMap(10, new ushort[] { 20, 20 }, 100, 3);

            var categories = som.Train(ordinalDenseSet);
        }
        public void sparse_features_medium()
        {
            // Cluster generator settings
            var maxRadius    = 100;
            var minRadius    = 10;
            var clusterCount = 10;

            // Data size
            var featureDim = 100;
            var obsCount   = 2000;

            var inputFeaturesTypes = new InputFeatureTypes[featureDim];

            for (var i = 0; i < featureDim; i++)
            {
                inputFeaturesTypes[i] = InputFeatureTypes.Ordinal;
            }

            var sparseOrdinalSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            var clusterGenerator  = new SyntheticDataGenerator(maxRadius, minRadius, obsCount, clusterCount, featureDim);
            var trueClusterLabels = new List <int>();

            using (var obsGetter = clusterGenerator.GenerateClusterObservations().GetEnumerator())
            {
                var isNextObservation = obsGetter.MoveNext();

                while (isNextObservation)
                {
                    var cluster = obsGetter.Current.Item1;
                    var obs     = obsGetter.Current.Item2;
                    trueClusterLabels.Add(cluster);
                    sparseOrdinalSet.AddInstance(obs);

                    isNextObservation = obsGetter.MoveNext();
                }
            }

            var clustering = new MiniBatchClustering(clusterCount, 100, 2000);

            clustering.Train(sparseOrdinalSet);
            var categories = clustering.Cluster(sparseOrdinalSet);

            var metricsGenerator = new MetricsGenerator();

            metricsGenerator.Add(Metrics.Purity);
            for (var i = 0; i < categories.Length; i++)
            {
                metricsGenerator.AddResult(categories[i], trueClusterLabels[i]);
                metricsGenerator.UpdateMetrics();
            }

            var purity = metricsGenerator.GetMetric(Metrics.Purity);

            Assert.True(purity + Epsilon - 0.6 > 0);
        }
Example #4
0
        public void standardize_dense_instance()
        {
            var instance1 = new[] { -1f, 5f, 0f, 1f };
            var instance2 = new[] { 0f, 6f, 1f, 1f };
            var instance3 = new[] { 3f, 7f, 1f, 1f };
            var instance4 = new[] { 4f, 0f, 1f, 0f };

            var feature1Sigma = Math.Sqrt(new[] { -1f, 0f, 3f, 4f }.Variance());
            var feature2Sigma = Math.Sqrt(new[] { 5f, 6f, 7f, 0f }.Variance());

            var feature1Mean = new[] { -1f, 0f, 3f, 4f }.Mean();
            var feature2Mean = new[] { 5f, 6f, 7f, 0f }.Mean();

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Flags,
                InputFeatureTypes.Flags
            };

            var mixedDenseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: false);

            mixedDenseSet.AddInstance(instance1);
            mixedDenseSet.AddInstance(instance2);
            mixedDenseSet.AddInstance(instance3);
            mixedDenseSet.AddInstance(instance4);

            mixedDenseSet.Standardize();

            var value1 = mixedDenseSet.Instances[0].GetValues();

            Assert.True(Math.Abs((-1f - feature1Mean) / feature1Sigma - value1[0]) < Epsilon);
            Assert.True((Math.Abs((5f - feature2Mean) / feature2Sigma - value1[1]) < Epsilon));
            Assert.Equal(-1f, value1[2]);
            Assert.Equal(1f, value1[3]);
        }
Example #5
0
        public void rescale_sparse_instance()
        {
            var instance1 = new[] { 1f, 5f, 0f, 1f };
            var instance2 = new[] { 0f, 6f, 1f, 1f };
            var instance3 = new[] { 3f, 7f, 1f, 1f };
            var instance4 = new[] { 4f, 0f, 1f, 0f };

            var feature1Max = 4f;
            var feature2Max = 7f;

            var feature1Min = 0f;
            var feature2Min = 0f;

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Flags,
                InputFeatureTypes.Flags
            };

            var mixedSparseSet = new InstanceRepresentation(
                inputFeaturesTypes,
                sparse: true);

            mixedSparseSet.AddInstance(instance1);
            mixedSparseSet.AddInstance(instance2);
            mixedSparseSet.AddInstance(instance3);
            mixedSparseSet.AddInstance(instance4);

            mixedSparseSet.Rescale();

            var value1 = mixedSparseSet.Instances[0].GetValues();

            Assert.True(Math.Abs((1f - feature1Min) / feature1Max - value1[0]) < Epsilon);
            Assert.True((Math.Abs((5f - feature2Min) / feature2Max - value1[1]) < Epsilon));
            Assert.Equal(0, value1[2]);
            Assert.Equal(1f, value1[3]);
        }
Example #6
0
        public void dense_features_simple()
        {
            var instance1 = new[] { 20f, 30f, 0f, 0f };
            var instance2 = new[] { 20f, 30f, 0f, 0f };
            var instance3 = new[] { 0f, 0f, 10f, 20f };
            var instance4 = new[] { 0f, 0f, 10f, 20f };

            var inputFeaturesTypes = new InputFeatureTypes[] {
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal,
                InputFeatureTypes.Ordinal
            };

            var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false);

            ordinalDenseSet.AddInstance(instance1);
            ordinalDenseSet.AddInstance(instance2);
            ordinalDenseSet.AddInstance(instance3);
            ordinalDenseSet.AddInstance(instance4);

            Assert.Equal(instance2, ordinalDenseSet.Instances[1].GetValues());
            Assert.Equal(4, ordinalDenseSet.FeauturesCount);
        }
Example #7
0
        // TODO: framework for checking if the feature values are mmeting requirements.
        /// <summary> Adds feature(column) to the feature representation. </summary>
        /// <param name="isSparse">
        /// It doesn't affect the flags, they are always sparse.
        /// </param>
        public void AddFeature(
            float[] values, InputFeatureTypes type,
            bool isSparse = false)
        {
            FeatureTypes.Add(type);
            var sIndices = new List <int>();

            if (InstancesCount != 0)
            {
                if (InstancesCount != values.Length)
                {
                    throw new ArgumentException("The feature length is different than instances count.");
                }
            }
            else
            {
                InstancesCount = values.Length;
            }

            switch (type)
            {
            case InputFeatureTypes.Ordinal:
                if (IsSortedDataset)
                {
                    if (isSparse)
                    {
                        SortSparseInput(
                            values,
                            out var nonZeroSortedValues,
                            out var nonZeroSortedIndices,
                            out var negCount);

                        Features.Add(new SparseFeature(
                                         values: nonZeroSortedValues,
                                         sortedIndices: nonZeroSortedIndices,
                                         length: values.Length,
                                         scopeOffset: 0,
                                         scopeCount: nonZeroSortedIndices.Length,
                                         negCount: negCount));
                        break;
                    }
                    else
                    {
                        var sortedIndices = new int[values.Length];
                        for (var i = 0; i < values.Length; i++)
                        {
                            sortedIndices[i] = i;
                        }
                        Array.Sort(values, sortedIndices);
                        Features.Add(new DenseFeature(values, sortedIndices, 0, values.Length));
                        break;
                    }
                }
                if (isSparse)
                {
                    var sValues = new List <float>();
                    for (var i = 0; i < values.Length; i++)
                    {
                        if (values[i] > 0f)
                        {
                            sValues.Add(values[i]);
                            sIndices.Add(i);
                        }
                    }
                    Features.Add(new SparseFeature(
                                     values: sValues.ToArray(),
                                     sortedIndices: sIndices.ToArray(),// Not sorted case;
                                     length: values.Length,
                                     scopeOffset: 0,
                                     scopeCount: sIndices.Count,
                                     negCount: -1));// Not sorted case;
                    break;
                }
                var copiedValues = new float[values.Length];
                Array.Copy(values, copiedValues, values.Length);

                Features.Add(new DenseFeature(values));
                break;

            case InputFeatureTypes.Flags:
                for (var i = 0; i < values.Length; i++)
                {
                    if (values[i] > 0f)
                    {
                        sIndices.Add(i);
                    }
                }
                Features.Add(new BinaryFeature(sIndices.ToArray(), values.Length, 0, sIndices.Count));
                break;
            }
        }