public Model(MultinomialLogisticRegression regression, Featurizer featurizer, FeatureSpace featureSpace, HashSet<Target> targets)
 {
     this.regression = regression;
     this.FeatureSpace = featureSpace;
     this.Featurizer = featurizer;
     this.Targets = targets;
 }
        public Model Learn(IEnumerable<MLEntity> entities, int numberOfEntities, FeatureSpace featureSpace, HashSet<Target> targets)
        {
            var featureMatrix = new double[numberOfEntities][];
            var labels = new int[numberOfEntities];

            int counter = 0;
            var targetToInt = Model.GetTargetToInt(targets);

            foreach (var entity in entities)
            {
                featureMatrix[counter] = this.featurizer.CreateFeatureVector(entity.WebSite, featureSpace);
                labels[counter] = targetToInt[entity.Label];
                ++counter;
            }

            Logger.Log("Features extracted");

            var regression = new MultinomialLogisticRegression(inputs: featureSpace.Size, categories: targets.Count);
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(regression);

            double delta;
            int iteration = 0;
            do
            {
                Logger.Log("Iteration: {0}", iteration);
                delta = lbnr.Run(featureMatrix, labels);
                iteration++;
            } while (iteration < 10 && delta > 1e-6);

            return new Model(regression, this.featurizer, featureSpace, targets);
        }
        /// <summary>
        ///   Creates a new MultinomialLogisticRegression that is a copy of the current instance.
        /// </summary>
        ///
        public object Clone()
        {
            var mlr = new MultinomialLogisticRegression(Inputs, Categories);

            for (int i = 0; i < coefficients.Length; i++)
            {
                for (int j = 0; j < coefficients[i].Length; j++)
                {
                    mlr.coefficients[i][j]   = coefficients[i][j];
                    mlr.standardErrors[i][j] = standardErrors[i][j];
                }
            }

            return(mlr);
        }
Exemple #4
0
        /// <summary>
        ///   The likelihood ratio test of the overall model, also called the model chi-square test.
        /// </summary>
        ///
        /// <remarks>
        ///   <para>
        ///   The Chi-square test, also called the likelihood ratio test or the log-likelihood test
        ///   is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test
        ///   indicates whether there is evidence of the need to move from a simpler model to a more
        ///   complicated one (where the simpler model is nested within the complicated one).</para>
        ///   <para>
        ///   The difference between the log-likelihood ratios for the researcher's model and a
        ///   simpler model is often called the "model chi-square".</para>
        /// </remarks>
        ///
        public ChiSquareTest ChiSquare(double[][] input, double[][] output)
        {
            double[] sums = output.Sum(0);

            double[] intercept = new double[NumberOfOutputs - 1];
            for (int i = 0; i < intercept.Length; i++)
            {
                intercept[i] = Math.Log(sums[i + 1] / sums[0]);
            }

            var regression = new MultinomialLogisticRegression(NumberOfInputs, NumberOfOutputs, intercept);

            double ratio = GetLogLikelihoodRatio(input, output, regression);

            return(new ChiSquareTest(ratio, (NumberOfInputs) * (NumberOfOutputs - 1)));
        }
        public void RegressTest2()
        {
            double[][] inputs;
            int[] outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            // Create a new Multinomial Logistic Regression for 3 categories
            var mlr = new MultinomialLogisticRegression(inputs: 2, categories: 3);

            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(mlr);

            // Now, we will iteratively estimate our model. The Run method returns
            // the maximum relative change in the model parameters and we will use
            // it as the convergence criteria.

            double delta;
            int iteration = 0;

            do
            {
                // Perform an iteration
                delta = lbnr.Run(inputs, outputs);
                iteration++;

            } while (iteration < 100 && delta > 1e-6);

            Assert.AreEqual(52, iteration);
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][2]));


            // This is the same example given in R Data Analysis Examples for
            // Multinomial Logistic Regression: http://www.ats.ucla.edu/stat/r/dae/mlogit.htm

            // brand 2
            Assert.AreEqual(-11.774655, mlr.Coefficients[0][0], 1e-4); // intercept
            Assert.AreEqual(0.523814, mlr.Coefficients[0][1], 1e-4); // female
            Assert.AreEqual(0.368206, mlr.Coefficients[0][2], 1e-4); // age

            // brand 3
            Assert.AreEqual(-22.721396, mlr.Coefficients[1][0], 1e-4); // intercept
            Assert.AreEqual(0.465941, mlr.Coefficients[1][1], 1e-4); // female
            Assert.AreEqual(0.685908, mlr.Coefficients[1][2], 1e-4); // age


            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][2]));

            /*
             // Using the standard Hessian estimation
             Assert.AreEqual(1.774612, mlr.StandardErrors[0][0], 1e-6);
             Assert.AreEqual(0.194247, mlr.StandardErrors[0][1], 1e-6);
             Assert.AreEqual(0.055003, mlr.StandardErrors[0][2], 1e-6);

             Assert.AreEqual(2.058028, mlr.StandardErrors[1][0], 1e-6);
             Assert.AreEqual(0.226090, mlr.StandardErrors[1][1], 1e-6);
             Assert.AreEqual(0.062627, mlr.StandardErrors[1][2], 1e-6);
             */

            // Using the lower-bound approximation
            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[0][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[0][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[0][2], 1e-6);

            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[1][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[1][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[1][2], 1e-6);

            double ll = mlr.GetLogLikelihood(inputs, outputs);

            Assert.AreEqual(-702.97, ll, 1e-2);
            Assert.IsFalse(double.IsNaN(ll));

            var chi = mlr.ChiSquare(inputs, outputs);
            Assert.AreEqual(185.85, chi.Statistic, 1e-2);
            Assert.IsFalse(double.IsNaN(chi.Statistic));

            var wald00 = mlr.GetWaldTest(0, 0);
            var wald01 = mlr.GetWaldTest(0, 1);
            var wald02 = mlr.GetWaldTest(0, 2);

            var wald10 = mlr.GetWaldTest(1, 0);
            var wald11 = mlr.GetWaldTest(1, 1);
            var wald12 = mlr.GetWaldTest(1, 2);

            Assert.IsFalse(double.IsNaN(wald00.Statistic));
            Assert.IsFalse(double.IsNaN(wald01.Statistic));
            Assert.IsFalse(double.IsNaN(wald02.Statistic));

            Assert.IsFalse(double.IsNaN(wald10.Statistic));
            Assert.IsFalse(double.IsNaN(wald11.Statistic));
            Assert.IsFalse(double.IsNaN(wald12.Statistic));

            /*
            // Using standard Hessian estimation
            Assert.AreEqual(-6.6351, wald00.Statistic, 1e-4);
            Assert.AreEqual( 2.6966, wald01.Statistic, 1e-4);
            Assert.AreEqual( 6.6943, wald02.Statistic, 1e-4);

            Assert.AreEqual(-11.0404, wald10.Statistic, 1e-4);
            Assert.AreEqual( 2.0609, wald11.Statistic, 1e-4);
            Assert.AreEqual(10.9524, wald12.Statistic, 1e-4);
            */

            // Using Lower-Bound approximation
            Assert.AreEqual(-11.241995503283842, wald00.Statistic, 1e-4);
            Assert.AreEqual(3.4202662152119889, wald01.Statistic, 1e-4);
            Assert.AreEqual(11.637150673342207, wald02.Statistic, 1e-4);

            Assert.AreEqual(-21.693553825772664, wald10.Statistic, 1e-4);
            Assert.AreEqual(3.0423802097069097, wald11.Statistic, 1e-4);
            Assert.AreEqual(21.678124991086548, wald12.Statistic, 1e-4);


        }
        private static MultinomialLogisticRegression createExample1()
        {
            MultinomialLogisticRegression mlr = new MultinomialLogisticRegression(2, 3);


            // brand 2
            mlr.Coefficients[0][0] = -11.774655; // intercept
            mlr.Coefficients[0][1] = 0.523814; // female
            mlr.Coefficients[0][2] = 0.368206; // age

            // brand 3
            mlr.Coefficients[1][0] = -22.721396; // intercept
            mlr.Coefficients[1][1] = 0.465941; // female
            mlr.Coefficients[1][2] = 0.685908; // age


            mlr.StandardErrors[0][0] = 1.774612;
            mlr.StandardErrors[0][1] = 0.194247;
            mlr.StandardErrors[0][2] = 0.055003;

            mlr.StandardErrors[1][0] = 2.058028;
            mlr.StandardErrors[1][1] = 0.226090;
            mlr.StandardErrors[1][2] = 0.062627;

            return mlr;
        }
        public void MultinomialLogisticRegressionConstructorTest()
        {
            int inputs = 4;
            int categories = 7;
            MultinomialLogisticRegression target = new MultinomialLogisticRegression(inputs, categories);
            Assert.AreEqual(4, target.Inputs);
            Assert.AreEqual(7, target.Categories);

            Assert.AreEqual(6, target.Coefficients.Length);
            for (int i = 0; i < target.Coefficients.Length; i++)
                Assert.AreEqual(5, target.Coefficients[i].Length);

            Assert.AreEqual(6, target.StandardErrors.Length);
            for (int i = 0; i < target.StandardErrors.Length; i++)
                Assert.AreEqual(5, target.StandardErrors[i].Length);
        }
 /// <summary>
 ///   Gets the Log-Likelihood Ratio between two models.
 /// </summary>
 ///
 /// <remarks>
 ///   The Log-Likelihood ratio is defined as 2*(LL - LL0).
 /// </remarks>
 ///
 /// <param name="input">A set of input data.</param>
 /// <param name="output">A set of output data.</param>
 /// <param name="regression">Another Logistic Regression model.</param>
 /// <returns>The Log-Likelihood ratio (a measure of performance
 /// between two models) calculated over the given data sets.</returns>
 ///
 public double GetLogLikelihoodRatio(double[][] input, double[][] output, MultinomialLogisticRegression regression)
 {
     return(2.0 * (this.GetLogLikelihood(input, output) - regression.GetLogLikelihood(input, output)));
 }
        /// <summary>
        ///   Creates a new MultinomialLogisticRegression that is a copy of the current instance.
        /// </summary>
        /// 
        public object Clone()
        {
            var mlr = new MultinomialLogisticRegression(Inputs, Categories);
            for (int i = 0; i < coefficients.Length; i++)
            {
                for (int j = 0; j < coefficients[i].Length; j++)
                {
                    mlr.coefficients[i][j] = coefficients[i][j];
                    mlr.standardErrors[i][j] = standardErrors[i][j];
                }
            }

            return mlr;
        }
 /// <summary>
 ///   Gets the Log-Likelihood Ratio between two models.
 /// </summary>
 /// 
 /// <remarks>
 ///   The Log-Likelihood ratio is defined as 2*(LL - LL0).
 /// </remarks>
 /// 
 /// <param name="input">A set of input data.</param>
 /// <param name="output">A set of output data.</param>
 /// <param name="regression">Another Logistic Regression model.</param>
 /// <returns>The Log-Likelihood ratio (a measure of performance
 /// between two models) calculated over the given data sets.</returns>
 /// 
 public double GetLogLikelihoodRatio(double[][] input, double[][] output, MultinomialLogisticRegression regression)
 {
     return 2.0 * (this.GetLogLikelihood(input, output) - regression.GetLogLikelihood(input, output));
 }
        /// <summary>
        ///   The likelihood ratio test of the overall model, also called the model chi-square test.
        /// </summary>
        /// 
        /// <remarks>
        ///   <para>
        ///   The Chi-square test, also called the likelihood ratio test or the log-likelihood test
        ///   is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test 
        ///   indicates whether there is evidence of the need to move from a simpler model to a more
        ///   complicated one (where the simpler model is nested within the complicated one).</para>
        ///   <para>
        ///   The difference between the log-likelihood ratios for the researcher's model and a
        ///   simpler model is often called the "model chi-square".</para>
        /// </remarks>
        /// 
        public ChiSquareTest ChiSquare(double[][] input, double[][] output)
        {
            double[] sums = output.Sum();

            double[] intercept = new double[Categories - 1];
            for (int i = 0; i < intercept.Length; i++)
                intercept[i] = Math.Log(sums[i + 1] / sums[0]);

            MultinomialLogisticRegression regression =
                new MultinomialLogisticRegression(Inputs, Categories, intercept);

            double ratio = GetLogLikelihoodRatio(input, output, regression);

            return new ChiSquareTest(ratio, (Inputs) * (Categories - 1));
        }
 private MultinomialLogisticRegression buildModel()
 {
     if (independent == null) formatData();
     mlr = new MultinomialLogisticRegression(nvars,ncat);
     LowerBoundNewtonRaphson lbn = new LowerBoundNewtonRaphson(mlr);
     do
     {
         delta = lbn.Run(independent, dependent);
         iteration++;
     } while (iteration < totit && delta > converg);
     coefficients = mlr.Coefficients;
     standarderror = new double[ncat-1][];
     waldstat = new double[ncat - 1][];
     waldpvalue = new double[ncat - 1][];
     for (int i = 0; i < coefficients.Length; i++)
     {
         double[] steArr = new double[nvars + 1];
         double[] waldStatArr = new double[nvars + 1];
         double[] waldPvalueArr = new double[nvars + 1];
         for (int j = 0; j < nvars+1; j++)
         {
             Accord.Statistics.Testing.WaldTest wt = mlr.GetWaldTest(i, j);
             steArr[j] = wt.StandardError;
             waldStatArr[j] = wt.Statistic;
             waldPvalueArr[j] = wt.PValue;
         }
         waldstat[i]=waldStatArr;
         waldpvalue[i]=waldPvalueArr;
         standarderror[i]=steArr;
     }
     loglikelihood = mlr.GetLogLikelihood(independent, dependent);
     deviance = mlr.GetDeviance(independent, dependent);
     x2 = mlr.ChiSquare(independent, dependent).Statistic;
     pv = mlr.ChiSquare(independent, dependent).PValue;
     return mlr;
 }
        private void Load(string path)
        {
            string modelFile = path + SuffixModel;
            string featuresFile = path + SuffixFeatures;
            double[][] weights = null;

            int targetCount = 0;
            using (var reader = new StreamReader(modelFile))
            {
                this.Targets = new HashSet<Target>(reader.ReadLine().Split('\t').Select(t => (Target)Enum.Parse(typeof(Target), t)));
                int count = int.Parse(reader.ReadLine());
                targetCount = this.Targets.Count;
                weights = new double[count][];

                count = 0;
                while (!reader.EndOfStream)
                {
                    var line = reader.ReadLine();
                    weights[count] = line.Split('\t').Select(t => double.Parse(t)).ToArray();
                    ++count;
                }
            }

            int inputs = weights[0].Length - 1; // We don't consider bias an input
            this.regression = new MultinomialLogisticRegression(inputs, targetCount);
            this.regression.Coefficients = weights;
            this.FeatureSpace = WebsiteClasification.FeatureSpace.LoadFromFile(featuresFile);
            this.Featurizer = new Featurizer();
        }