Exemple #1
0
        private protected CrossValidationResult[] CrossValidateTrain(IDataView data, IEstimator <ITransformer> estimator,
                                                                     int numFolds, string samplingKeyColumn, int?seed = null)
        {
            Environment.CheckValue(data, nameof(data));
            Environment.CheckValue(estimator, nameof(estimator));
            Environment.CheckParam(numFolds > 1, nameof(numFolds), "Must be more than 1");
            Environment.CheckValueOrNull(samplingKeyColumn);

            DataOperationsCatalog.EnsureGroupPreservationColumn(Environment, ref data, ref samplingKeyColumn, seed);
            var result = new CrossValidationResult[numFolds];
            int fold   = 0;

            // Sequential per-fold training.
            // REVIEW: we could have a parallel implementation here. We would need to
            // spawn off a separate host per fold in that case.
            foreach (var split in DataOperationsCatalog.CrossValidationSplit(Environment, data, numFolds, samplingKeyColumn))
            {
                var model      = estimator.Fit(split.TrainSet);
                var scoredTest = model.Transform(split.TestSet);
                result[fold] = new CrossValidationResult(model, scoredTest, fold);
                fold++;
            }

            return(result);
        }
Exemple #2
0
    public CrossValidationResult evaluate()
    {
        CrossValidationResult ret = new CrossValidationResult(modshogunPINVOKE.CrossValidation_evaluate(swigCPtr), true);

        if (modshogunPINVOKE.SWIGPendingException.Pending)
        {
            throw modshogunPINVOKE.SWIGPendingException.Retrieve();
        }
        return(ret);
    }
Exemple #3
0
        private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvNaiveBayesClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: p => new NaiveBayesLearning <BernoulliDistribution>(),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
Exemple #4
0
        public static void saveResultsToFile(string file, CrossValidationResult[] results)
        {
            var stream = File.Open(file, FileMode.Create, FileAccess.Write);
            var sw     = new StreamWriter(stream);

            sw.WriteLine(CrossValidationResult.getCSVHead());

            foreach (var result in results)
            {
                foreach (var row in result.getCSVData())
                {
                    sw.WriteLine(row);
                }
            }
            sw.Close();
        }
        public static void Main(string[] args)
        {
            // create data set from csv file
            MultiLayerPerceptron neuralNet = (MultiLayerPerceptron)NeuralNetwork.createFromFile("irisNet.nnet");
            DataSet dataSet = DataSet.createFromFile("data_sets/iris_data_normalised.txt", 4, 3, ",");

            string[] classNames = new string[] { "Virginica", "Setosa", "Versicolor" };

            CrossValidation crossval = new CrossValidation(neuralNet, dataSet, 5);

            crossval.addEvaluator(new ClassifierEvaluator.MultiClass(classNames));

            crossval.run();
            CrossValidationResult results = crossval.Result;

            Console.WriteLine(results);
        }
Exemple #6
0
        /// <summary>
        /// Train the <paramref name="estimator"/> on <paramref name="numFolds"/> folds of the data sequentially.
        /// Return each model and each scored test dataset.
        /// </summary>
        protected internal CrossValidationResult[] CrossValidateTrain(IDataView data, IEstimator <ITransformer> estimator,
                                                                      int numFolds, string samplingKeyColumn, uint?seed = null)
        {
            Environment.CheckValue(data, nameof(data));
            Environment.CheckValue(estimator, nameof(estimator));
            Environment.CheckParam(numFolds > 1, nameof(numFolds), "Must be more than 1");
            Environment.CheckValueOrNull(samplingKeyColumn);

            EnsureGroupPreservationColumn(ref data, ref samplingKeyColumn, seed);

            Func <int, CrossValidationResult> foldFunction =
                fold =>
            {
                var trainFilter = new RangeFilter(Environment, new RangeFilter.Options
                {
                    Column     = samplingKeyColumn,
                    Min        = (double)fold / numFolds,
                    Max        = (double)(fold + 1) / numFolds,
                    Complement = true
                }, data);
                var testFilter = new RangeFilter(Environment, new RangeFilter.Options
                {
                    Column     = samplingKeyColumn,
                    Min        = (double)fold / numFolds,
                    Max        = (double)(fold + 1) / numFolds,
                    Complement = false
                }, data);

                var model      = estimator.Fit(trainFilter);
                var scoredTest = model.Transform(testFilter);
                return(new CrossValidationResult(model, scoredTest, fold));
            };

            // Sequential per-fold training.
            // REVIEW: we could have a parallel implementation here. We would need to
            // spawn off a separate host per fold in that case.
            var result = new CrossValidationResult[numFolds];

            for (int fold = 0; fold < numFolds; fold++)
            {
                result[fold] = foldFunction(fold);
            }

            return(result);
        }
Exemple #7
0
        private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvLogisticRegressionClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
 internal static HandleRef getCPtr(CrossValidationResult obj) {
   return (obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr;
 }
Exemple #9
0
 internal static HandleRef getCPtr(CrossValidationResult obj)
 {
     return((obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr);
 }
Exemple #10
0
 public CrossValidationResult evaluate() {
   CrossValidationResult ret = new CrossValidationResult(modshogunPINVOKE.CrossValidation_evaluate(swigCPtr), true);
   if (modshogunPINVOKE.SWIGPendingException.Pending) throw modshogunPINVOKE.SWIGPendingException.Retrieve();
   return ret;
 }