public RandomForest GetBestRandomForestsWithGridSearch(AppIdentAcordSource appIdentAcordSource, out GridSearchParameterCollection bestParameters, out double minError) { // grid search ranges (parameter values) GridSearchRange[] parameterRanges = { new GridSearchRange("trees", new double[] { 1, 3, 5, 8, 11 }), new GridSearchRange("sampleRatio", new[] { 0.7, 0.8, 0.9 }), new GridSearchRange("join", new double[] { 25, 50, 100, 150, 200 }) }; var samples = appIdentAcordSource.Samples; var labels = appIdentAcordSource.LabelsAsIntegers; var decisionVariables = appIdentAcordSource.DecisionVariables; // instantiate grid search algorithm for a CLF model var gridSearch = new GridSearch <RandomForest>(parameterRanges) { Fitting = delegate(GridSearchParameterCollection parameters, out double error) { Console.WriteLine($"{DateTime.Now} RandomForest grid search"); // Use the parameters to build the model // Create a new learning algorithm var rfcModel = CreateRandomForestModel(decisionVariables, parameters, samples, labels); // Measure the model performance to return as an out parameter error = new ZeroOneLoss(labels).Loss(rfcModel.Decide(samples)); // Return the current model return(rfcModel); } //,ParallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = 1 } }; // Compute the grid search to find the best RandomForest model return(gridSearch.Compute(out bestParameters, out minError)); }
public IEnumerable <double[, ]> ProcessFeatureSelection(AppIdentAcordSource appIdentAcordSource, double trashold) { var iterationResults = new List <double[, ]>(); Type mostCorrelatedFeature = null; double correlation = 1; while (correlation > trashold) { if (mostCorrelatedFeature != null) { appIdentAcordSource.FeatureSelector.RemoveFeature(mostCorrelatedFeature); } var correlationMatrix = this.GetCorrelationMatrix(appIdentAcordSource); iterationResults.Add(correlationMatrix); mostCorrelatedFeature = this.GetMostCorellatedFeature(correlationMatrix, out correlation); //Console.WriteLine(correlation); } return(iterationResults); }
public double[,] GetCorrelationMatrix(AppIdentAcordSource appIdentAcordSource) => appIdentAcordSource.Samples2D.Correlation();
public CrossValidationResult <RandomForest> GetCrossValidationResultsOfRandomForestModel(AppIdentAcordSource appIdentAcordSource, GridSearchParameterCollection bestParameters) { var samples = appIdentAcordSource.Samples; var labels = appIdentAcordSource.LabelsAsIntegers; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation <RandomForest>(size: samples.Length, folds: 10) { Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. // Lets now grab the training data: var trainingInputs = samples.Get(indicesTrain); var trainingOutputs = labels.Get(indicesTrain); // And now the validation data: var validationInputs = samples.Get(indicesValidation); var validationOutputs = labels.Get(indicesValidation); // create random forest model with the best parameters from grid search results var rfcModel = CreateRandomForestModel(appIdentAcordSource.DecisionVariables, bestParameters, trainingInputs, trainingOutputs); // compute the training error rate with ZeroOneLoss function var trainingError = new ZeroOneLoss(trainingOutputs).Loss(rfcModel.Decide(trainingInputs)); // Now we can compute the validation error on the validation data: var validationError = new ZeroOneLoss(validationOutputs).Loss(rfcModel.Decide(validationInputs)); // Return a new information structure containing the model and the errors achieved. var tag = new ValidationDataSource(validationInputs, validationOutputs); return(new CrossValidationValues <RandomForest>(rfcModel, trainingError, validationError) { Tag = tag }); } }; // Compute the cross-validation return(crossvalidation.Compute()); }