public Model(MultinomialLogisticRegression regression, Featurizer featurizer, FeatureSpace featureSpace, HashSet<Target> targets) { this.regression = regression; this.FeatureSpace = featureSpace; this.Featurizer = featurizer; this.Targets = targets; }
public Model Learn(IEnumerable<MLEntity> entities, int numberOfEntities, FeatureSpace featureSpace, HashSet<Target> targets) { var featureMatrix = new double[numberOfEntities][]; var labels = new int[numberOfEntities]; int counter = 0; var targetToInt = Model.GetTargetToInt(targets); foreach (var entity in entities) { featureMatrix[counter] = this.featurizer.CreateFeatureVector(entity.WebSite, featureSpace); labels[counter] = targetToInt[entity.Label]; ++counter; } Logger.Log("Features extracted"); var regression = new MultinomialLogisticRegression(inputs: featureSpace.Size, categories: targets.Count); LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(regression); double delta; int iteration = 0; do { Logger.Log("Iteration: {0}", iteration); delta = lbnr.Run(featureMatrix, labels); iteration++; } while (iteration < 10 && delta > 1e-6); return new Model(regression, this.featurizer, featureSpace, targets); }
/// <summary> /// Creates a new MultinomialLogisticRegression that is a copy of the current instance. /// </summary> /// public object Clone() { var mlr = new MultinomialLogisticRegression(Inputs, Categories); for (int i = 0; i < coefficients.Length; i++) { for (int j = 0; j < coefficients[i].Length; j++) { mlr.coefficients[i][j] = coefficients[i][j]; mlr.standardErrors[i][j] = standardErrors[i][j]; } } return(mlr); }
/// <summary> /// The likelihood ratio test of the overall model, also called the model chi-square test. /// </summary> /// /// <remarks> /// <para> /// The Chi-square test, also called the likelihood ratio test or the log-likelihood test /// is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test /// indicates whether there is evidence of the need to move from a simpler model to a more /// complicated one (where the simpler model is nested within the complicated one).</para> /// <para> /// The difference between the log-likelihood ratios for the researcher's model and a /// simpler model is often called the "model chi-square".</para> /// </remarks> /// public ChiSquareTest ChiSquare(double[][] input, double[][] output) { double[] sums = output.Sum(0); double[] intercept = new double[NumberOfOutputs - 1]; for (int i = 0; i < intercept.Length; i++) { intercept[i] = Math.Log(sums[i + 1] / sums[0]); } var regression = new MultinomialLogisticRegression(NumberOfInputs, NumberOfOutputs, intercept); double ratio = GetLogLikelihoodRatio(input, output, regression); return(new ChiSquareTest(ratio, (NumberOfInputs) * (NumberOfOutputs - 1))); }
public void RegressTest2() { double[][] inputs; int[] outputs; CreateInputOutputsExample1(out inputs, out outputs); // Create a new Multinomial Logistic Regression for 3 categories var mlr = new MultinomialLogisticRegression(inputs: 2, categories: 3); // Create a estimation algorithm to estimate the regression LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(mlr); // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta; int iteration = 0; do { // Perform an iteration delta = lbnr.Run(inputs, outputs); iteration++; } while (iteration < 100 && delta > 1e-6); Assert.AreEqual(52, iteration); Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][0])); Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][1])); Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][2])); Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][0])); Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][1])); Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][2])); // This is the same example given in R Data Analysis Examples for // Multinomial Logistic Regression: http://www.ats.ucla.edu/stat/r/dae/mlogit.htm // brand 2 Assert.AreEqual(-11.774655, mlr.Coefficients[0][0], 1e-4); // intercept Assert.AreEqual(0.523814, mlr.Coefficients[0][1], 1e-4); // female Assert.AreEqual(0.368206, mlr.Coefficients[0][2], 1e-4); // age // brand 3 Assert.AreEqual(-22.721396, mlr.Coefficients[1][0], 1e-4); // intercept Assert.AreEqual(0.465941, mlr.Coefficients[1][1], 1e-4); // female Assert.AreEqual(0.685908, mlr.Coefficients[1][2], 1e-4); // age Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][0])); Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][1])); Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][2])); Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][0])); Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][1])); Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][2])); /* // Using the standard Hessian estimation Assert.AreEqual(1.774612, mlr.StandardErrors[0][0], 1e-6); Assert.AreEqual(0.194247, mlr.StandardErrors[0][1], 1e-6); Assert.AreEqual(0.055003, mlr.StandardErrors[0][2], 1e-6); Assert.AreEqual(2.058028, mlr.StandardErrors[1][0], 1e-6); Assert.AreEqual(0.226090, mlr.StandardErrors[1][1], 1e-6); Assert.AreEqual(0.062627, mlr.StandardErrors[1][2], 1e-6); */ // Using the lower-bound approximation Assert.AreEqual(1.047378039787443, mlr.StandardErrors[0][0], 1e-6); Assert.AreEqual(0.153150051082552, mlr.StandardErrors[0][1], 1e-6); Assert.AreEqual(0.031640507386863, mlr.StandardErrors[0][2], 1e-6); Assert.AreEqual(1.047378039787443, mlr.StandardErrors[1][0], 1e-6); Assert.AreEqual(0.153150051082552, mlr.StandardErrors[1][1], 1e-6); Assert.AreEqual(0.031640507386863, mlr.StandardErrors[1][2], 1e-6); double ll = mlr.GetLogLikelihood(inputs, outputs); Assert.AreEqual(-702.97, ll, 1e-2); Assert.IsFalse(double.IsNaN(ll)); var chi = mlr.ChiSquare(inputs, outputs); Assert.AreEqual(185.85, chi.Statistic, 1e-2); Assert.IsFalse(double.IsNaN(chi.Statistic)); var wald00 = mlr.GetWaldTest(0, 0); var wald01 = mlr.GetWaldTest(0, 1); var wald02 = mlr.GetWaldTest(0, 2); var wald10 = mlr.GetWaldTest(1, 0); var wald11 = mlr.GetWaldTest(1, 1); var wald12 = mlr.GetWaldTest(1, 2); Assert.IsFalse(double.IsNaN(wald00.Statistic)); Assert.IsFalse(double.IsNaN(wald01.Statistic)); Assert.IsFalse(double.IsNaN(wald02.Statistic)); Assert.IsFalse(double.IsNaN(wald10.Statistic)); Assert.IsFalse(double.IsNaN(wald11.Statistic)); Assert.IsFalse(double.IsNaN(wald12.Statistic)); /* // Using standard Hessian estimation Assert.AreEqual(-6.6351, wald00.Statistic, 1e-4); Assert.AreEqual( 2.6966, wald01.Statistic, 1e-4); Assert.AreEqual( 6.6943, wald02.Statistic, 1e-4); Assert.AreEqual(-11.0404, wald10.Statistic, 1e-4); Assert.AreEqual( 2.0609, wald11.Statistic, 1e-4); Assert.AreEqual(10.9524, wald12.Statistic, 1e-4); */ // Using Lower-Bound approximation Assert.AreEqual(-11.241995503283842, wald00.Statistic, 1e-4); Assert.AreEqual(3.4202662152119889, wald01.Statistic, 1e-4); Assert.AreEqual(11.637150673342207, wald02.Statistic, 1e-4); Assert.AreEqual(-21.693553825772664, wald10.Statistic, 1e-4); Assert.AreEqual(3.0423802097069097, wald11.Statistic, 1e-4); Assert.AreEqual(21.678124991086548, wald12.Statistic, 1e-4); }
private static MultinomialLogisticRegression createExample1() { MultinomialLogisticRegression mlr = new MultinomialLogisticRegression(2, 3); // brand 2 mlr.Coefficients[0][0] = -11.774655; // intercept mlr.Coefficients[0][1] = 0.523814; // female mlr.Coefficients[0][2] = 0.368206; // age // brand 3 mlr.Coefficients[1][0] = -22.721396; // intercept mlr.Coefficients[1][1] = 0.465941; // female mlr.Coefficients[1][2] = 0.685908; // age mlr.StandardErrors[0][0] = 1.774612; mlr.StandardErrors[0][1] = 0.194247; mlr.StandardErrors[0][2] = 0.055003; mlr.StandardErrors[1][0] = 2.058028; mlr.StandardErrors[1][1] = 0.226090; mlr.StandardErrors[1][2] = 0.062627; return mlr; }
public void MultinomialLogisticRegressionConstructorTest() { int inputs = 4; int categories = 7; MultinomialLogisticRegression target = new MultinomialLogisticRegression(inputs, categories); Assert.AreEqual(4, target.Inputs); Assert.AreEqual(7, target.Categories); Assert.AreEqual(6, target.Coefficients.Length); for (int i = 0; i < target.Coefficients.Length; i++) Assert.AreEqual(5, target.Coefficients[i].Length); Assert.AreEqual(6, target.StandardErrors.Length); for (int i = 0; i < target.StandardErrors.Length; i++) Assert.AreEqual(5, target.StandardErrors[i].Length); }
/// <summary> /// Gets the Log-Likelihood Ratio between two models. /// </summary> /// /// <remarks> /// The Log-Likelihood ratio is defined as 2*(LL - LL0). /// </remarks> /// /// <param name="input">A set of input data.</param> /// <param name="output">A set of output data.</param> /// <param name="regression">Another Logistic Regression model.</param> /// <returns>The Log-Likelihood ratio (a measure of performance /// between two models) calculated over the given data sets.</returns> /// public double GetLogLikelihoodRatio(double[][] input, double[][] output, MultinomialLogisticRegression regression) { return(2.0 * (this.GetLogLikelihood(input, output) - regression.GetLogLikelihood(input, output))); }
/// <summary> /// Creates a new MultinomialLogisticRegression that is a copy of the current instance. /// </summary> /// public object Clone() { var mlr = new MultinomialLogisticRegression(Inputs, Categories); for (int i = 0; i < coefficients.Length; i++) { for (int j = 0; j < coefficients[i].Length; j++) { mlr.coefficients[i][j] = coefficients[i][j]; mlr.standardErrors[i][j] = standardErrors[i][j]; } } return mlr; }
/// <summary> /// Gets the Log-Likelihood Ratio between two models. /// </summary> /// /// <remarks> /// The Log-Likelihood ratio is defined as 2*(LL - LL0). /// </remarks> /// /// <param name="input">A set of input data.</param> /// <param name="output">A set of output data.</param> /// <param name="regression">Another Logistic Regression model.</param> /// <returns>The Log-Likelihood ratio (a measure of performance /// between two models) calculated over the given data sets.</returns> /// public double GetLogLikelihoodRatio(double[][] input, double[][] output, MultinomialLogisticRegression regression) { return 2.0 * (this.GetLogLikelihood(input, output) - regression.GetLogLikelihood(input, output)); }
/// <summary> /// The likelihood ratio test of the overall model, also called the model chi-square test. /// </summary> /// /// <remarks> /// <para> /// The Chi-square test, also called the likelihood ratio test or the log-likelihood test /// is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test /// indicates whether there is evidence of the need to move from a simpler model to a more /// complicated one (where the simpler model is nested within the complicated one).</para> /// <para> /// The difference between the log-likelihood ratios for the researcher's model and a /// simpler model is often called the "model chi-square".</para> /// </remarks> /// public ChiSquareTest ChiSquare(double[][] input, double[][] output) { double[] sums = output.Sum(); double[] intercept = new double[Categories - 1]; for (int i = 0; i < intercept.Length; i++) intercept[i] = Math.Log(sums[i + 1] / sums[0]); MultinomialLogisticRegression regression = new MultinomialLogisticRegression(Inputs, Categories, intercept); double ratio = GetLogLikelihoodRatio(input, output, regression); return new ChiSquareTest(ratio, (Inputs) * (Categories - 1)); }
private MultinomialLogisticRegression buildModel() { if (independent == null) formatData(); mlr = new MultinomialLogisticRegression(nvars,ncat); LowerBoundNewtonRaphson lbn = new LowerBoundNewtonRaphson(mlr); do { delta = lbn.Run(independent, dependent); iteration++; } while (iteration < totit && delta > converg); coefficients = mlr.Coefficients; standarderror = new double[ncat-1][]; waldstat = new double[ncat - 1][]; waldpvalue = new double[ncat - 1][]; for (int i = 0; i < coefficients.Length; i++) { double[] steArr = new double[nvars + 1]; double[] waldStatArr = new double[nvars + 1]; double[] waldPvalueArr = new double[nvars + 1]; for (int j = 0; j < nvars+1; j++) { Accord.Statistics.Testing.WaldTest wt = mlr.GetWaldTest(i, j); steArr[j] = wt.StandardError; waldStatArr[j] = wt.Statistic; waldPvalueArr[j] = wt.PValue; } waldstat[i]=waldStatArr; waldpvalue[i]=waldPvalueArr; standarderror[i]=steArr; } loglikelihood = mlr.GetLogLikelihood(independent, dependent); deviance = mlr.GetDeviance(independent, dependent); x2 = mlr.ChiSquare(independent, dependent).Statistic; pv = mlr.ChiSquare(independent, dependent).PValue; return mlr; }
private void Load(string path) { string modelFile = path + SuffixModel; string featuresFile = path + SuffixFeatures; double[][] weights = null; int targetCount = 0; using (var reader = new StreamReader(modelFile)) { this.Targets = new HashSet<Target>(reader.ReadLine().Split('\t').Select(t => (Target)Enum.Parse(typeof(Target), t))); int count = int.Parse(reader.ReadLine()); targetCount = this.Targets.Count; weights = new double[count][]; count = 0; while (!reader.EndOfStream) { var line = reader.ReadLine(); weights[count] = line.Split('\t').Select(t => double.Parse(t)).ToArray(); ++count; } } int inputs = weights[0].Length - 1; // We don't consider bias an input this.regression = new MultinomialLogisticRegression(inputs, targetCount); this.regression.Coefficients = weights; this.FeatureSpace = WebsiteClasification.FeatureSpace.LoadFromFile(featuresFile); this.Featurizer = new Featurizer(); }