public void sparse_features_simple() { var instance1 = new[] { 20f, 30f, 0f, 0f }; var instance2 = new[] { 20f, 30f, 0f, 0f }; var instance3 = new[] { 0f, 0f, 10f, 20f }; var instance4 = new[] { 0f, 0f, 10f, 20f }; var inputFeaturesTypes = new InputFeatureTypes[] { InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal }; var ordinalSparseSet = new InstanceRepresentation( inputFeaturesTypes, sparse: true); ordinalSparseSet.AddInstance(instance1); ordinalSparseSet.AddInstance(instance2); ordinalSparseSet.AddInstance(instance3); ordinalSparseSet.AddInstance(instance4); var clustering = new MiniBatchClustering(2, 3, 10); clustering.Train(ordinalSparseSet); var categories = clustering.Cluster(ordinalSparseSet); }
public void dense_features_simple() { var instance1 = new[] { 20f, 30f, 0f, 0f }; var instance2 = new[] { 20f, 30f, 0f, 0f }; var instance3 = new[] { 0f, 0f, 10f, 20f }; var instance4 = new[] { 0f, 0f, 10f, 20f }; var inputFeaturesTypes = new InputFeatureTypes[] { InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal }; var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false); ordinalDenseSet.AddInstance(instance1); ordinalDenseSet.AddInstance(instance2); ordinalDenseSet.AddInstance(instance3); ordinalDenseSet.AddInstance(instance4); var som = new SelfOrganizingMap(10, new ushort[] { 20, 20 }, 100, 3); var categories = som.Train(ordinalDenseSet); }
public void sparse_features_medium() { // Cluster generator settings var maxRadius = 100; var minRadius = 10; var clusterCount = 10; // Data size var featureDim = 100; var obsCount = 2000; var inputFeaturesTypes = new InputFeatureTypes[featureDim]; for (var i = 0; i < featureDim; i++) { inputFeaturesTypes[i] = InputFeatureTypes.Ordinal; } var sparseOrdinalSet = new InstanceRepresentation( inputFeaturesTypes, sparse: true); var clusterGenerator = new SyntheticDataGenerator(maxRadius, minRadius, obsCount, clusterCount, featureDim); var trueClusterLabels = new List <int>(); using (var obsGetter = clusterGenerator.GenerateClusterObservations().GetEnumerator()) { var isNextObservation = obsGetter.MoveNext(); while (isNextObservation) { var cluster = obsGetter.Current.Item1; var obs = obsGetter.Current.Item2; trueClusterLabels.Add(cluster); sparseOrdinalSet.AddInstance(obs); isNextObservation = obsGetter.MoveNext(); } } var clustering = new MiniBatchClustering(clusterCount, 100, 2000); clustering.Train(sparseOrdinalSet); var categories = clustering.Cluster(sparseOrdinalSet); var metricsGenerator = new MetricsGenerator(); metricsGenerator.Add(Metrics.Purity); for (var i = 0; i < categories.Length; i++) { metricsGenerator.AddResult(categories[i], trueClusterLabels[i]); metricsGenerator.UpdateMetrics(); } var purity = metricsGenerator.GetMetric(Metrics.Purity); Assert.True(purity + Epsilon - 0.6 > 0); }
public void standardize_dense_instance() { var instance1 = new[] { -1f, 5f, 0f, 1f }; var instance2 = new[] { 0f, 6f, 1f, 1f }; var instance3 = new[] { 3f, 7f, 1f, 1f }; var instance4 = new[] { 4f, 0f, 1f, 0f }; var feature1Sigma = Math.Sqrt(new[] { -1f, 0f, 3f, 4f }.Variance()); var feature2Sigma = Math.Sqrt(new[] { 5f, 6f, 7f, 0f }.Variance()); var feature1Mean = new[] { -1f, 0f, 3f, 4f }.Mean(); var feature2Mean = new[] { 5f, 6f, 7f, 0f }.Mean(); var inputFeaturesTypes = new InputFeatureTypes[] { InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Flags, InputFeatureTypes.Flags }; var mixedDenseSet = new InstanceRepresentation( inputFeaturesTypes, sparse: false); mixedDenseSet.AddInstance(instance1); mixedDenseSet.AddInstance(instance2); mixedDenseSet.AddInstance(instance3); mixedDenseSet.AddInstance(instance4); mixedDenseSet.Standardize(); var value1 = mixedDenseSet.Instances[0].GetValues(); Assert.True(Math.Abs((-1f - feature1Mean) / feature1Sigma - value1[0]) < Epsilon); Assert.True((Math.Abs((5f - feature2Mean) / feature2Sigma - value1[1]) < Epsilon)); Assert.Equal(-1f, value1[2]); Assert.Equal(1f, value1[3]); }
public void rescale_sparse_instance() { var instance1 = new[] { 1f, 5f, 0f, 1f }; var instance2 = new[] { 0f, 6f, 1f, 1f }; var instance3 = new[] { 3f, 7f, 1f, 1f }; var instance4 = new[] { 4f, 0f, 1f, 0f }; var feature1Max = 4f; var feature2Max = 7f; var feature1Min = 0f; var feature2Min = 0f; var inputFeaturesTypes = new InputFeatureTypes[] { InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Flags, InputFeatureTypes.Flags }; var mixedSparseSet = new InstanceRepresentation( inputFeaturesTypes, sparse: true); mixedSparseSet.AddInstance(instance1); mixedSparseSet.AddInstance(instance2); mixedSparseSet.AddInstance(instance3); mixedSparseSet.AddInstance(instance4); mixedSparseSet.Rescale(); var value1 = mixedSparseSet.Instances[0].GetValues(); Assert.True(Math.Abs((1f - feature1Min) / feature1Max - value1[0]) < Epsilon); Assert.True((Math.Abs((5f - feature2Min) / feature2Max - value1[1]) < Epsilon)); Assert.Equal(0, value1[2]); Assert.Equal(1f, value1[3]); }
public void dense_features_simple() { var instance1 = new[] { 20f, 30f, 0f, 0f }; var instance2 = new[] { 20f, 30f, 0f, 0f }; var instance3 = new[] { 0f, 0f, 10f, 20f }; var instance4 = new[] { 0f, 0f, 10f, 20f }; var inputFeaturesTypes = new InputFeatureTypes[] { InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal, InputFeatureTypes.Ordinal }; var ordinalDenseSet = new InstanceRepresentation(inputFeaturesTypes, sparse: false); ordinalDenseSet.AddInstance(instance1); ordinalDenseSet.AddInstance(instance2); ordinalDenseSet.AddInstance(instance3); ordinalDenseSet.AddInstance(instance4); Assert.Equal(instance2, ordinalDenseSet.Instances[1].GetValues()); Assert.Equal(4, ordinalDenseSet.FeauturesCount); }
// TODO: framework for checking if the feature values are mmeting requirements. /// <summary> Adds feature(column) to the feature representation. </summary> /// <param name="isSparse"> /// It doesn't affect the flags, they are always sparse. /// </param> public void AddFeature( float[] values, InputFeatureTypes type, bool isSparse = false) { FeatureTypes.Add(type); var sIndices = new List <int>(); if (InstancesCount != 0) { if (InstancesCount != values.Length) { throw new ArgumentException("The feature length is different than instances count."); } } else { InstancesCount = values.Length; } switch (type) { case InputFeatureTypes.Ordinal: if (IsSortedDataset) { if (isSparse) { SortSparseInput( values, out var nonZeroSortedValues, out var nonZeroSortedIndices, out var negCount); Features.Add(new SparseFeature( values: nonZeroSortedValues, sortedIndices: nonZeroSortedIndices, length: values.Length, scopeOffset: 0, scopeCount: nonZeroSortedIndices.Length, negCount: negCount)); break; } else { var sortedIndices = new int[values.Length]; for (var i = 0; i < values.Length; i++) { sortedIndices[i] = i; } Array.Sort(values, sortedIndices); Features.Add(new DenseFeature(values, sortedIndices, 0, values.Length)); break; } } if (isSparse) { var sValues = new List <float>(); for (var i = 0; i < values.Length; i++) { if (values[i] > 0f) { sValues.Add(values[i]); sIndices.Add(i); } } Features.Add(new SparseFeature( values: sValues.ToArray(), sortedIndices: sIndices.ToArray(),// Not sorted case; length: values.Length, scopeOffset: 0, scopeCount: sIndices.Count, negCount: -1));// Not sorted case; break; } var copiedValues = new float[values.Length]; Array.Copy(values, copiedValues, values.Length); Features.Add(new DenseFeature(values)); break; case InputFeatureTypes.Flags: for (var i = 0; i < values.Length; i++) { if (values[i] > 0f) { sIndices.Add(i); } } Features.Add(new BinaryFeature(sIndices.ToArray(), values.Length, 0, sIndices.Count)); break; } }