Ejemplo n.º 1
0
        private void RemoveFeaturesWithConstantValue(AppIdentAcordSource appIdentAcordSource)
        {
            var matrix          = appIdentAcordSource.Samples2D;
            var sampleCount     = matrix.GetLength(0);
            var featureCount    = matrix.GetLength(1);
            var removedFeatures = 0;

            for (var featureIndex = 0; featureIndex < featureCount; featureIndex++)
            {
                var isConstant = true;
                var firstValue = matrix[0, featureIndex];
                for (var sampleIndex = 1; sampleIndex < sampleCount; sampleIndex++)
                {
                    if (!(Math.Abs(firstValue - matrix[sampleIndex, featureIndex]) > 0))
                    {
                        continue;
                    }
                    isConstant = false;
                    break;
                }
                if (isConstant)
                {
                    var feature = this.FeatureSelector.SelectedFeatures[featureIndex - removedFeatures];
                    removedFeatures++;
                    appIdentAcordSource.FeatureSelector.RemoveFeature(feature);
                }
            }
        }
Ejemplo n.º 2
0
        public RandomForest GetBestRandomForestsWithGridSearch(AppIdentAcordSource appIdentAcordSource, out GridSearchParameterCollection bestParameters, out double minError)
        {
            // grid search ranges (parameter values)
            GridSearchRange[] parameterRanges =
            {
                new GridSearchRange("trees", new double[]
                {
                    //1,
                    //3,
                    //5,
                    //8,
                    11,
                    13,
                    17,
                    19,
                    37
                }),
                new GridSearchRange("sampleRatio", new[]
                {
                    // 0.7,
                    0.8,
                    // 0.9
                }),
                new GridSearchRange("join", new double[]
                {
                    //25,
                    //50,
                    //100,
                    150,
                    200,
                    250,
                    300
                })
            };

            var samples           = appIdentAcordSource.Samples;
            var labels            = appIdentAcordSource.LabelsAsIntegers;
            var decisionVariables = appIdentAcordSource.DecisionVariables;
            // instantiate grid search algorithm for a CLF model
            var gridSearch = new GridSearch <RandomForest>(parameterRanges)
            {
                Fitting = delegate(GridSearchParameterCollection parameters, out double error)
                {
                    Console.WriteLine($"{DateTime.Now} RandomForest grid search.");
                    // Use the parameters to build the model
                    // Create a new learning algorithm
                    var rfcModel = CreateRandomForestModel(decisionVariables, parameters, samples, labels);
                    // Measure the model performance to return as an out parameter
                    error = new ZeroOneLoss(labels).Loss(rfcModel.Decide(samples));
                    // Return the current model
                    return(rfcModel);
                }
                //,ParallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = 1 }
            };

            // Compute the grid search to find the best RandomForest model
            return(gridSearch.Compute(out bestParameters, out minError));
        }
Ejemplo n.º 3
0
        public IEnumerable <double[, ]> ProcessFeatureSelection(AppIdentAcordSource appIdentAcordSource, double trashold)
        {
            this.FeatureSelector = appIdentAcordSource.FeatureSelector;
            this.RemoveFeaturesWithConstantValue(appIdentAcordSource);

            var    iterationResults      = new List <double[, ]>();
            Type   mostCorrelatedFeature = null;
            double correlation           = 1;

            while (correlation > trashold)
            {
                if (mostCorrelatedFeature != null)
                {
                    appIdentAcordSource.FeatureSelector.RemoveFeature(mostCorrelatedFeature);
                }
                var correlationMatrix = this.GetCorrelationMatrix(appIdentAcordSource);
                iterationResults.Add(correlationMatrix);
                mostCorrelatedFeature = this.GetMostCorellatedFeature(correlationMatrix, out correlation);
            }
            this.FeatureSelector = null;
            return(iterationResults);
        }
Ejemplo n.º 4
0
 private double[,] GetCorrelationMatrix(AppIdentAcordSource appIdentAcordSource) => appIdentAcordSource.Samples2D.Correlation();
Ejemplo n.º 5
0
        public CrossValidationResult <RandomForest> GetCrossValidationResultsOfRandomForestModel(AppIdentAcordSource appIdentAcordSource, GridSearchParameterCollection bestParameters, int folds = 10)
        {
            var samples           = appIdentAcordSource.Samples;
            var labels            = appIdentAcordSource.LabelsAsIntegers;
            var decisionVariables = appIdentAcordSource.DecisionVariables;
            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation <RandomForest>(samples.Length, folds)
            {
                Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // The fitting function is passing the indices of the original set which
                    // should be considered training data and the indices of the original set
                    // which should be considered validation data.
                    Console.WriteLine($"{DateTime.Now} RandomForest cross validation.");
                    // Lets now grab the training data:
                    var trainingInputs  = samples.Get(indicesTrain);
                    var trainingOutputs = labels.Get(indicesTrain);
                    // And now the validation data:
                    var validationInputs  = samples.Get(indicesValidation);
                    var validationOutputs = labels.Get(indicesValidation);
                    // create random forest model with the best parameters from grid search results
                    var rfcModel = CreateRandomForestModel(decisionVariables, bestParameters, trainingInputs, trainingOutputs);
                    // compute the training error rate with ZeroOneLoss function
                    var trainingError = new ZeroOneLoss(trainingOutputs).Loss(rfcModel.Decide(trainingInputs));
                    // Now we can compute the validation error on the validation data:
                    var validationError = new ZeroOneLoss(validationOutputs).Loss(rfcModel.Decide(validationInputs));
                    // Return a new information structure containing the model and the errors achieved.

                    var tag = new ValidationDataSource(validationInputs, validationOutputs);
                    return(new CrossValidationValues <RandomForest>(rfcModel, trainingError, validationError)
                    {
                        Tag = tag
                    });
                }
            };

            // Compute the cross-validation
            return(crossvalidation.Compute());
        }