public void learn_test() { // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm #region doc_learn_1 // This example downloads an example dataset from the web and learns a multinomial logistic // regression on it. However, please keep in mind that the Multinomial Logistic Regression // can also work without many of the elements that will be shown below, like the codebook, // DataTables, and a CsvReader. // Let's download an example dataset from the web to learn a multinomial logistic regression: CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true); // Let's read the CSV into a DataTable. As mentioned above, this step // can help, but is not necessarily required for learning a the model: DataTable table = reader.ToTable(); // We will learn a MLR regression between the following input and output fields of this table: string[] inputNames = new[] { "write", "ses" }; string[] outputNames = new[] { "prog" }; // Now let's create a codification codebook to convert the string fields in the data // into integer symbols. This is required because the MLR model can only learn from // numeric data, so strings have to be transformed first. We can force a particular // interpretation for those columns if needed, as shown in the initializer below: var codification = new Codification() { { "write", CodificationVariable.Continuous }, { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } }, { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } }, }; // Learn the codification codification.Learn(table); // Now, transform symbols into a vector representation, growing the number of inputs: double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble(); double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble(); // Create a new Multinomial Logistic Regression Analysis: var analysis = new MultinomialLogisticRegressionAnalysis() { InputNames = inputNames, OutputNames = outputNames, }; // Learn the regression from the input and output pairs: MultinomialLogisticRegression regression = analysis.Learn(x, y); // Let's retrieve some information about what we just learned: int coefficients = analysis.Coefficients.Count; // should be 9 int numberOfInputs = analysis.NumberOfInputs; // should be 3 int numberOfOutputs = analysis.NumberOfOutputs; // should be 3 inputNames = analysis.InputNames; // should be "write", "ses: middle", "ses: high" outputNames = analysis.OutputNames; // should be "prog: academic", "prog: general", "prog: vocation" // The regression is best visualized when it is data-bound to a // Windows.Forms DataGridView or WPF DataGrid. You can get the // values for all different coefficients and discrete values: // DataGridBox.Show(regression.Coefficients); // uncomment this line // You can get the matrix of coefficients: double[][] coef = analysis.CoefficientValues; // Should be equal to: double[][] expectedCoef = new double[][] { new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 }, new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 } }; // And their associated standard errors: double[][] stdErr = analysis.StandardErrors; // Should be equal to: double[][] expectedErr = new double[][] { new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 }, new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 } }; // We can also get statistics and hypothesis tests: WaldTest[][] wald = analysis.WaldTests; // should all have p < 0.05 ChiSquareTest chiSquare = analysis.ChiSquare; // should be p=1.06300120956871E-08 double logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591 // You can use the regression to predict the values: int[] pred = regression.Transform(x); // And get the accuracy of the prediction if needed: var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1)); double acc = cm.Accuracy; // should be 0.61 double kappa = cm.Kappa; // should be 0.2993487536492252 #endregion Assert.AreEqual(9, coefficients); Assert.AreEqual(3, numberOfInputs); Assert.AreEqual(3, numberOfOutputs); Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames); Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames); Assert.AreEqual(0.61, acc, 1e-10); Assert.AreEqual(0.2993487536492252, kappa, 1e-10); Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8); Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8); testmlr(analysis); }
public void learn_test_4() { #region doc_learn_2 // This example shows how to learn a multinomial logistic regression // analysis in the famous Fisher's Iris dataset. It should serve to // demonstrate that this class does not really need to be used with // DataTables, Codification codebooks and other supplementary features. Iris iris = new Iris(); // Load Fisher's Iris dataset: double[][] x = iris.Instances; int[] y = iris.ClassLabels; // Create a new Multinomial Logistic Regression Analysis: var analysis = new MultinomialLogisticRegressionAnalysis(); // Note: we could have passed the class names from iris.ClassNames and // variable names from iris.VariableNames during MLR instantiation as: // // var analysis = new MultinomialLogisticRegressionAnalysis() // { // InputNames = iris.VariableNames, // OutputNames = iris.ClassNames // }; // However, this example is also intended to demonstrate that // those are not required when learning a regression analysis. // Learn the regression from the input and output pairs: MultinomialLogisticRegression regression = analysis.Learn(x, y); // Let's retrieve some information about what we just learned: int coefficients = analysis.Coefficients.Count; // should be 11 int numberOfInputs = analysis.NumberOfInputs; // should be 4 int numberOfOutputs = analysis.NumberOfOutputs; // should be 3 string[] inputNames = analysis.InputNames; // should be "Input 1", "Input 2", "Input 3", "Input 4" string[] outputNames = analysis.OutputNames; // should be "Class 0", "class 1", "class 2" // The regression is best visualized when it is data-bound to a // Windows.Forms DataGridView or WPF DataGrid. You can get the // values for all different coefficients and discrete values: // DataGridBox.Show(regression.Coefficients); // uncomment this line // You can get the matrix of coefficients: double[][] coef = analysis.CoefficientValues; // Should be equal to: double[][] expectedCoef = new double[][] { new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 }, new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 } }; // And their associated standard errors: double[][] stdErr = analysis.StandardErrors; // Should be equal to: double[][] expectedErr = new double[][] { new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 }, new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 } }; // We can also get statistics and hypothesis tests: WaldTest[][] wald = analysis.WaldTests; // should all have p < 0.05 ChiSquareTest chiSquare = analysis.ChiSquare; // should be p=0 double logLikelihood = analysis.LogLikelihood; // should be -29.558338705646587 // You can use the regression to predict the values: int[] pred = regression.Transform(x); // And get the accuracy of the prediction if needed: var cm = GeneralConfusionMatrix.Estimate(regression, x, y); double acc = cm.Accuracy; // should be 0.94666666666666666 double kappa = cm.Kappa; // should be 0.91999999999999982 #endregion Assert.AreEqual(11, coefficients); Assert.AreEqual(4, numberOfInputs); Assert.AreEqual(3, numberOfOutputs); Assert.AreEqual(new[] { "Input 0", "Input 1", "Input 2", "Input 3" }, inputNames); Assert.AreEqual(new[] { "Class 0", "Class 1", "Class 2" }, outputNames); Assert.AreEqual(0.94666666666666666, acc, 1e-10); Assert.AreEqual(0.91999999999999982, kappa, 1e-10); Assert.AreEqual(7.8271969268290043E-54, chiSquare.PValue, 1e-8); Assert.AreEqual(-29.558338705646587, logLikelihood, 1e-8); }
/// <summary> /// Applies the transformation to an input, producing an associated output. /// </summary> /// <param name="input">The input data to which the transformation should be applied.</param> /// <returns> /// The output generated by applying this transformation to the given input. /// </returns> public override int Transform(double[] input) { return(regression.Transform(input)); }