/*
         * Operations that affect all the samples as a whole
         */

        // Normalize the data by range
        // Each feature will have new values in the range
        public ClassificationData NormalizeToRange( double lower, double upper )
        {
            // Check the boundaries of the target interval
            if (upper <= lower)
            {
                throw new ArgumentException( );
            }

            // Initialize the ranges
            int numFeatures = samples[0].FeatureVector.Count;
            double[] minFeatureValues = new double[numFeatures];
            double[] maxFeatureValues = new double[numFeatures];
            for (int i = 0; i < numFeatures; i++)
            {
                minFeatureValues[i] = double.MaxValue;
                maxFeatureValues[i] = double.MinValue;
            }

            // First calculate the range of the features
            foreach (ISample sample in samples)
            {
                for (int iFeature = 0; iFeature < numFeatures; iFeature++)
                {
                    // Update the min
                    if (sample.FeatureVector[iFeature] < minFeatureValues[iFeature])
                    {
                        minFeatureValues[iFeature] = sample.FeatureVector[iFeature];
                    }

                    // Update the max
                    if (sample.FeatureVector[iFeature] > maxFeatureValues[iFeature])
                    {
                        maxFeatureValues[iFeature] = sample.FeatureVector[iFeature];
                    }
                }
            }

            // To prevent division by zero, if max = min all features are equal
            // to the min, if max -> max + 1 the tranform values are the same
            for (int i = 0; i < numFeatures; i++)
            {
                if (maxFeatureValues[i] == minFeatureValues[i])
                {
                    maxFeatureValues[i]++;
                }
            }

            // Create the new samples
            ClassificationData result = new ClassificationData( );
            for (int i = 0; i < samples.Count; i++)
            {
                SampleBasic sample = new SampleBasic( );
                sample.Description = samples[i].Description;
                sample.FeatureVector = new List<double>( numFeatures );
                sample.Classify( samples[i].Category );
                result.Samples.Add( sample );
            }

            // Obtain the new feature values by a linear transformation
            // feature = (original - min) / (max - min) goes from 0 to 1
            // feature = original * (upper - lower) + lower goes from lower to upper
            double featureValue;
            for (int iSample = 0; iSample < samples.Count; iSample++)
            {
                for (int iFeature = 0; iFeature < numFeatures; iFeature++)
                {
                    featureValue = samples[iSample].FeatureVector[iFeature];
                    
                    // Transform to [0,1]
                    featureValue = (featureValue - minFeatureValues[iFeature]) /
                        (maxFeatureValues[iFeature] - minFeatureValues[iFeature]);

                    // Transform to [lower,upper]
                    featureValue = featureValue * (upper - lower) + lower;

                    result.Samples[iSample].FeatureVector.Insert( iFeature, featureValue );
                }
            }

            return result;
        }
Example #2
0
        /*
         * Operations that affect all the samples as a whole
         */

        // Normalize the data by range
        // Each feature will have new values in the range
        public ClassificationData NormalizeToRange(double lower, double upper)
        {
            // Check the boundaries of the target interval
            if (upper <= lower)
            {
                throw new ArgumentException( );
            }

            // Initialize the ranges
            int numFeatures = samples[0].FeatureVector.Count;

            double[] minFeatureValues = new double[numFeatures];
            double[] maxFeatureValues = new double[numFeatures];
            for (int i = 0; i < numFeatures; i++)
            {
                minFeatureValues[i] = double.MaxValue;
                maxFeatureValues[i] = double.MinValue;
            }

            // First calculate the range of the features
            foreach (ISample sample in samples)
            {
                for (int iFeature = 0; iFeature < numFeatures; iFeature++)
                {
                    // Update the min
                    if (sample.FeatureVector[iFeature] < minFeatureValues[iFeature])
                    {
                        minFeatureValues[iFeature] = sample.FeatureVector[iFeature];
                    }

                    // Update the max
                    if (sample.FeatureVector[iFeature] > maxFeatureValues[iFeature])
                    {
                        maxFeatureValues[iFeature] = sample.FeatureVector[iFeature];
                    }
                }
            }

            // To prevent division by zero, if max = min all features are equal
            // to the min, if max -> max + 1 the tranform values are the same
            for (int i = 0; i < numFeatures; i++)
            {
                if (maxFeatureValues[i] == minFeatureValues[i])
                {
                    maxFeatureValues[i]++;
                }
            }

            // Create the new samples
            ClassificationData result = new ClassificationData( );

            for (int i = 0; i < samples.Count; i++)
            {
                SampleBasic sample = new SampleBasic( );
                sample.Description   = samples[i].Description;
                sample.FeatureVector = new List <double>(numFeatures);
                sample.Classify(samples[i].Category);
                result.Samples.Add(sample);
            }

            // Obtain the new feature values by a linear transformation
            // feature = (original - min) / (max - min) goes from 0 to 1
            // feature = original * (upper - lower) + lower goes from lower to upper
            double featureValue;

            for (int iSample = 0; iSample < samples.Count; iSample++)
            {
                for (int iFeature = 0; iFeature < numFeatures; iFeature++)
                {
                    featureValue = samples[iSample].FeatureVector[iFeature];

                    // Transform to [0,1]
                    featureValue = (featureValue - minFeatureValues[iFeature]) /
                                   (maxFeatureValues[iFeature] - minFeatureValues[iFeature]);

                    // Transform to [lower,upper]
                    featureValue = featureValue * (upper - lower) + lower;

                    result.Samples[iSample].FeatureVector.Insert(iFeature, featureValue);
                }
            }

            return(result);
        }