private protected CrossValidationResult[] CrossValidateTrain(IDataView data, IEstimator <ITransformer> estimator, int numFolds, string samplingKeyColumn, int?seed = null) { Environment.CheckValue(data, nameof(data)); Environment.CheckValue(estimator, nameof(estimator)); Environment.CheckParam(numFolds > 1, nameof(numFolds), "Must be more than 1"); Environment.CheckValueOrNull(samplingKeyColumn); DataOperationsCatalog.EnsureGroupPreservationColumn(Environment, ref data, ref samplingKeyColumn, seed); var result = new CrossValidationResult[numFolds]; int fold = 0; // Sequential per-fold training. // REVIEW: we could have a parallel implementation here. We would need to // spawn off a separate host per fold in that case. foreach (var split in DataOperationsCatalog.CrossValidationSplit(Environment, data, numFolds, samplingKeyColumn)) { var model = estimator.Fit(split.TrainSet); var scoredTest = model.Transform(split.TestSet); result[fold] = new CrossValidationResult(model, scoredTest, fold); fold++; } return(result); }
public CrossValidationResult evaluate() { CrossValidationResult ret = new CrossValidationResult(modshogunPINVOKE.CrossValidation_evaluate(swigCPtr), true); if (modshogunPINVOKE.SWIGPendingException.Pending) { throw modshogunPINVOKE.SWIGPendingException.Retrieve(); } return(ret); }
private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1) { CalcInputAndOutputVariables(minOccurences); var cvNaiveBayesClassifier = CrossValidation.Create( k: numOfFolds, learner: p => new NaiveBayesLearning <BernoulliDistribution>(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
public static void saveResultsToFile(string file, CrossValidationResult[] results) { var stream = File.Open(file, FileMode.Create, FileAccess.Write); var sw = new StreamWriter(stream); sw.WriteLine(CrossValidationResult.getCSVHead()); foreach (var result in results) { foreach (var row in result.getCSVData()) { sw.WriteLine(row); } } sw.Close(); }
public static void Main(string[] args) { // create data set from csv file MultiLayerPerceptron neuralNet = (MultiLayerPerceptron)NeuralNetwork.createFromFile("irisNet.nnet"); DataSet dataSet = DataSet.createFromFile("data_sets/iris_data_normalised.txt", 4, 3, ","); string[] classNames = new string[] { "Virginica", "Setosa", "Versicolor" }; CrossValidation crossval = new CrossValidation(neuralNet, dataSet, 5); crossval.addEvaluator(new ClassifierEvaluator.MultiClass(classNames)); crossval.run(); CrossValidationResult results = crossval.Result; Console.WriteLine(results); }
/// <summary> /// Train the <paramref name="estimator"/> on <paramref name="numFolds"/> folds of the data sequentially. /// Return each model and each scored test dataset. /// </summary> protected internal CrossValidationResult[] CrossValidateTrain(IDataView data, IEstimator <ITransformer> estimator, int numFolds, string samplingKeyColumn, uint?seed = null) { Environment.CheckValue(data, nameof(data)); Environment.CheckValue(estimator, nameof(estimator)); Environment.CheckParam(numFolds > 1, nameof(numFolds), "Must be more than 1"); Environment.CheckValueOrNull(samplingKeyColumn); EnsureGroupPreservationColumn(ref data, ref samplingKeyColumn, seed); Func <int, CrossValidationResult> foldFunction = fold => { var trainFilter = new RangeFilter(Environment, new RangeFilter.Options { Column = samplingKeyColumn, Min = (double)fold / numFolds, Max = (double)(fold + 1) / numFolds, Complement = true }, data); var testFilter = new RangeFilter(Environment, new RangeFilter.Options { Column = samplingKeyColumn, Min = (double)fold / numFolds, Max = (double)(fold + 1) / numFolds, Complement = false }, data); var model = estimator.Fit(trainFilter); var scoredTest = model.Transform(testFilter); return(new CrossValidationResult(model, scoredTest, fold)); }; // Sequential per-fold training. // REVIEW: we could have a parallel implementation here. We would need to // spawn off a separate host per fold in that case. var result = new CrossValidationResult[numFolds]; for (int fold = 0; fold < numFolds; fold++) { result[fold] = foldFunction(fold); } return(result); }
private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100) { CalcInputAndOutputVariables(minOccurences); var cvLogisticRegressionClassifier = CrossValidation.Create( k: numOfFolds, learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100, Regularization = 1e-6 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
internal static HandleRef getCPtr(CrossValidationResult obj) { return (obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr; }
internal static HandleRef getCPtr(CrossValidationResult obj) { return((obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr); }
public CrossValidationResult evaluate() { CrossValidationResult ret = new CrossValidationResult(modshogunPINVOKE.CrossValidation_evaluate(swigCPtr), true); if (modshogunPINVOKE.SWIGPendingException.Pending) throw modshogunPINVOKE.SWIGPendingException.Retrieve(); return ret; }