예제 #1
0
        public RandomForest GetBestRandomForestsWithGridSearch(AppIdentAcordSource appIdentAcordSource, out GridSearchParameterCollection bestParameters, out double minError)
        {
            // grid search ranges (parameter values)
            GridSearchRange[] parameterRanges =
            {
                new GridSearchRange("trees", new double[]
                {
                    1,
                    3,
                    5,
                    8,
                    11
                }),
                new GridSearchRange("sampleRatio", new[]
                {
                    0.7,
                    0.8,
                    0.9
                }),
                new GridSearchRange("join", new double[]
                {
                    25,
                    50,
                    100,
                    150,
                    200
                })
            };

            var samples           = appIdentAcordSource.Samples;
            var labels            = appIdentAcordSource.LabelsAsIntegers;
            var decisionVariables = appIdentAcordSource.DecisionVariables;
            // instantiate grid search algorithm for a CLF model
            var gridSearch = new GridSearch <RandomForest>(parameterRanges)
            {
                Fitting = delegate(GridSearchParameterCollection parameters, out double error)
                {
                    Console.WriteLine($"{DateTime.Now} RandomForest grid search");
                    // Use the parameters to build the model
                    // Create a new learning algorithm
                    var rfcModel = CreateRandomForestModel(decisionVariables, parameters, samples, labels);
                    // Measure the model performance to return as an out parameter
                    error = new ZeroOneLoss(labels).Loss(rfcModel.Decide(samples));
                    // Return the current model
                    return(rfcModel);
                }
                //,ParallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = 1 }
            };

            // Compute the grid search to find the best RandomForest model
            return(gridSearch.Compute(out bestParameters, out minError));
        }
예제 #2
0
        public IEnumerable <double[, ]> ProcessFeatureSelection(AppIdentAcordSource appIdentAcordSource, double trashold)
        {
            var    iterationResults      = new List <double[, ]>();
            Type   mostCorrelatedFeature = null;
            double correlation           = 1;

            while (correlation > trashold)
            {
                if (mostCorrelatedFeature != null)
                {
                    appIdentAcordSource.FeatureSelector.RemoveFeature(mostCorrelatedFeature);
                }
                var correlationMatrix = this.GetCorrelationMatrix(appIdentAcordSource);
                iterationResults.Add(correlationMatrix);
                mostCorrelatedFeature = this.GetMostCorellatedFeature(correlationMatrix, out correlation);
                //Console.WriteLine(correlation);
            }
            return(iterationResults);
        }
예제 #3
0
 public double[,] GetCorrelationMatrix(AppIdentAcordSource appIdentAcordSource) => appIdentAcordSource.Samples2D.Correlation();
예제 #4
0
        public CrossValidationResult <RandomForest> GetCrossValidationResultsOfRandomForestModel(AppIdentAcordSource appIdentAcordSource, GridSearchParameterCollection bestParameters)
        {
            var samples = appIdentAcordSource.Samples;
            var labels  = appIdentAcordSource.LabelsAsIntegers;
            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation <RandomForest>(size: samples.Length, folds: 10)
            {
                Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // The fitting function is passing the indices of the original set which
                    // should be considered training data and the indices of the original set
                    // which should be considered validation data.

                    // Lets now grab the training data:
                    var trainingInputs  = samples.Get(indicesTrain);
                    var trainingOutputs = labels.Get(indicesTrain);
                    // And now the validation data:
                    var validationInputs  = samples.Get(indicesValidation);
                    var validationOutputs = labels.Get(indicesValidation);
                    // create random forest model with the best parameters from grid search results
                    var rfcModel = CreateRandomForestModel(appIdentAcordSource.DecisionVariables, bestParameters, trainingInputs, trainingOutputs);
                    // compute the training error rate with ZeroOneLoss function
                    var trainingError = new ZeroOneLoss(trainingOutputs).Loss(rfcModel.Decide(trainingInputs));
                    // Now we can compute the validation error on the validation data:
                    var validationError = new ZeroOneLoss(validationOutputs).Loss(rfcModel.Decide(validationInputs));
                    // Return a new information structure containing the model and the errors achieved.

                    var tag = new ValidationDataSource(validationInputs, validationOutputs);
                    return(new CrossValidationValues <RandomForest>(rfcModel, trainingError, validationError)
                    {
                        Tag = tag
                    });
                }
            };

            // Compute the cross-validation
            return(crossvalidation.Compute());
        }