示例#1
0
        /// <summary>Generate Logistic Regression model based on a set of examples.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <returns>Model.</returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            X = IncreaseDimensions(X, this.PolynomialFeatures);

            this.Preprocess(X);

            // guarantee 1/0 based label vector
            y = y.ToBinary(f => f == 1d, falseValue: 0d);

            // add intercept term
            X = X.Insert(Vector.Ones(X.Rows), 0, VectorType.Col, false);

            Vector theta = Vector.Rand(X.Cols);

            // run gradient descent
            var optimizer = new numl.Math.Optimization.Optimizer(theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = new numl.Math.Functions.Cost.LogisticCostFunction()
                {
                    X                = X,
                    Y                = y,
                    Lambda           = this.Lambda,
                    Regularizer      = new numl.Math.Functions.Regularization.L2Regularizer(),
                    LogisticFunction = this.LogisticFunction
                }
            };

            optimizer.Run();

            LogisticRegressionModel model = new LogisticRegressionModel()
            {
                Descriptor         = this.Descriptor,
                NormalizeFeatures  = base.NormalizeFeatures,
                FeatureNormalizer  = base.FeatureNormalizer,
                FeatureProperties  = base.FeatureProperties,
                Theta              = optimizer.Properties.Theta,
                LogisticFunction   = this.LogisticFunction,
                PolynomialFeatures = this.PolynomialFeatures
            };

            return(model);
        }
        /// <summary>Generate Linear Regression model based on a set of examples.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <returns>Model.</returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            this.Preprocess(X);

            // copy matrix
            Matrix copy = X.Copy();

            // add intercept term
            copy = copy.Insert(Vector.Ones(copy.Rows), 0, VectorType.Col);

            // create initial theta
            Vector theta = Vector.Rand(copy.Cols);

            // run gradient descent
            var optimizer = new numl.Math.Optimization.Optimizer(theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = new numl.Math.Functions.Cost.LinearCostFunction()
                {
                    X           = copy,
                    Y           = y,
                    Lambda      = this.Lambda,
                    Regularizer = new numl.Math.Functions.Regularization.L2Regularizer()
                }
            };

            optimizer.Run();

            // once converged create model and apply theta

            LinearRegressionModel model = new LinearRegressionModel()
            {
                Descriptor        = this.Descriptor,
                NormalizeFeatures = base.NormalizeFeatures,
                FeatureNormalizer = base.FeatureNormalizer,
                FeatureProperties = base.FeatureProperties,
                Theta             = optimizer.Properties.Theta
            };

            return(model);
        }
        /// <summary>
        /// Generates a new Collaborative Filtering model.
        /// </summary>
        /// <param name="X">Training matrix values.</param>
        /// <param name="y">Vector of entity identifiers.</param>
        /// <returns></returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            this.Preprocess(X.Copy());

            // inputs are ratings from each user (X = entities x ratings), y = entity id.
            // create rating range in case we don't have one already
            if (this.Ratings == null)
                this.Ratings = new Range(X.Min(), X.Max());

            // indicator matrix of 1's where rating was provided otherwise 0's.
            Matrix R = X.ToBinary(f => this.Ratings.Test(f));

            // The mean needs to be values within rating range only.
            Vector mean = X.GetRows().Select(s =>
                                        s.Where(w => this.Ratings.Test(w)).Sum() /
                                        s.Where(w => this.Ratings.Test(w)).Count()
                                    ).ToVector();

            // update feature averages before preprocessing features.
            this.FeatureProperties.Average = mean;

            this.Preprocess(X);

            // where references could be user ratings and entities are movies / books, etc.
            int references = X.Cols, entities = X.Rows;

            // initialize Theta parameters
            Matrix ThetaX = Matrix.Rand(entities, this.CollaborativeFeatures, -1d);
            Matrix ThetaY = Matrix.Rand(references, this.CollaborativeFeatures, -1d);

            numl.Math.Functions.Cost.ICostFunction costFunction = new numl.Math.Functions.Cost.CofiCostFunction()
            {
                CollaborativeFeatures = this.CollaborativeFeatures,
                Lambda = this.Lambda,
                R = R,
                Regularizer = null,
                X = ThetaX,
                Y = X.Unshape()
            };

            // we're optimising two params so combine them
            Vector Theta = Vector.Combine(ThetaX.Unshape(), ThetaY.Unshape());

            numl.Math.Optimization.Optimizer optimizer = new numl.Math.Optimization.Optimizer(Theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = costFunction
            };

            optimizer.Run();

            // extract the optimised parameter Theta
            ThetaX = optimizer.Properties.Theta.Slice(0, (ThetaX.Rows * ThetaX.Cols) - 1).Reshape(entities, VectorType.Row);
            ThetaY = optimizer.Properties.Theta.Slice(ThetaX.Rows * ThetaX.Cols, Theta.Length - 1).Reshape(references, VectorType.Row);

            // create reference mappings, each value is the original index.
            this.ReferenceFeatureMap = (this.ReferenceFeatureMap == null ? Vector.Create(references, i => i) : this.ReferenceFeatureMap);
            this.EntityFeatureMap = (this.EntityFeatureMap == null ? Vector.Create(entities, i => i) : this.EntityFeatureMap);

            return new CofiRecommenderModel()
            {
                Descriptor = this.Descriptor,
                NormalizeFeatures = this.NormalizeFeatures,
                FeatureNormalizer = this.FeatureNormalizer,
                FeatureProperties = this.FeatureProperties,
                Ratings = this.Ratings,
                ReferenceFeatureMap = this.ReferenceFeatureMap,
                EntityFeatureMap = this.EntityFeatureMap,
                Mu = mean,
                Y = y,
                Reference = X,
                ThetaX = ThetaX,
                ThetaY = ThetaY
            };
        }
        /// <summary>Generate Logistic Regression model based on a set of examples.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <returns>Model.</returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            X = IncreaseDimensions(X, this.PolynomialFeatures);

            this.Preprocess(X);

            // guarantee 1/0 based label vector
            y = y.ToBinary(f => f == 1d, falseValue: 0d);

            // add intercept term
            X = X.Insert(Vector.Ones(X.Rows), 0, VectorType.Col, false);

            Vector theta = Vector.Rand(X.Cols);

            // run gradient descent
            var optimizer = new numl.Math.Optimization.Optimizer(theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = new numl.Math.Functions.Cost.LogisticCostFunction()
                {
                    X = X,
                    Y = y,
                    Lambda = this.Lambda,
                    Regularizer = new numl.Math.Functions.Regularization.L2Regularizer(),
                    LogisticFunction = this.LogisticFunction
                }
            };

            optimizer.Run();

            LogisticRegressionModel model = new LogisticRegressionModel()
            {
                Descriptor = this.Descriptor,
                NormalizeFeatures = base.NormalizeFeatures,
                FeatureNormalizer = base.FeatureNormalizer,
                FeatureProperties = base.FeatureProperties,
                Theta = optimizer.Properties.Theta,
                LogisticFunction = this.LogisticFunction,
                PolynomialFeatures = this.PolynomialFeatures
            };

            return model;
        }
        /// <summary>Generate Linear Regression model based on a set of examples.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <returns>Model.</returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            this.Preprocess(X);

            // copy matrix
            Matrix copy = X.Copy();

            // add intercept term
            copy = copy.Insert(Vector.Ones(copy.Rows), 0, VectorType.Col);

            // create initial theta
            Vector theta = Vector.Rand(copy.Cols);

            // run gradient descent
            var optimizer = new numl.Math.Optimization.Optimizer(theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = new numl.Math.Functions.Cost.LinearCostFunction()
                {
                    X = copy,
                    Y = y,
                    Lambda = this.Lambda,
                    Regularizer = new numl.Math.Functions.Regularization.L2Regularizer()
                }
            };

            optimizer.Run();

            // once converged create model and apply theta

            LinearRegressionModel model = new LinearRegressionModel()
            {
                Descriptor = this.Descriptor,
                NormalizeFeatures = base.NormalizeFeatures,
                FeatureNormalizer = base.FeatureNormalizer,
                FeatureProperties = base.FeatureProperties,
                Theta = optimizer.Properties.Theta
            };

            return model;
        }
        /// <summary>
        /// Generates a new Collaborative Filtering model.
        /// </summary>
        /// <param name="X">Training matrix values.</param>
        /// <param name="y">Vector of entity identifiers.</param>
        /// <returns></returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            this.Preprocess(X.Copy());

            // inputs are ratings from each user (X = entities x ratings), y = entity id.
            // create rating range in case we don't have one already
            if (this.Ratings == null)
            {
                this.Ratings = new Range(X.Min(), X.Max());
            }

            // indicator matrix of 1's where rating was provided otherwise 0's.
            Matrix R = X.ToBinary(f => this.Ratings.Test(f));

            // The mean needs to be values within rating range only.
            Vector mean = X.GetRows().Select(s =>
                                             s.Where(w => this.Ratings.Test(w)).Sum() /
                                             s.Where(w => this.Ratings.Test(w)).Count()
                                             ).ToVector();

            // update feature averages before preprocessing features.
            this.FeatureProperties.Average = mean;

            this.Preprocess(X);

            // where references could be user ratings and entities are movies / books, etc.
            int references = X.Cols, entities = X.Rows;

            // initialize Theta parameters
            Matrix ThetaX = Matrix.Rand(entities, this.CollaborativeFeatures, -1d);
            Matrix ThetaY = Matrix.Rand(references, this.CollaborativeFeatures, -1d);

            numl.Math.Functions.Cost.ICostFunction costFunction = new numl.Math.Functions.Cost.CofiCostFunction()
            {
                CollaborativeFeatures = this.CollaborativeFeatures,
                Lambda      = this.Lambda,
                R           = R,
                Regularizer = null,
                X           = ThetaX,
                Y           = X.Unshape()
            };

            // we're optimising two params so combine them
            Vector Theta = Vector.Combine(ThetaX.Unshape(), ThetaY.Unshape());

            numl.Math.Optimization.Optimizer optimizer = new numl.Math.Optimization.Optimizer(Theta, this.MaxIterations, this.LearningRate)
            {
                CostFunction = costFunction
            };

            optimizer.Run();

            // extract the optimised parameter Theta
            ThetaX = optimizer.Properties.Theta.Slice(0, (ThetaX.Rows * ThetaX.Cols) - 1).Reshape(entities, VectorType.Row);
            ThetaY = optimizer.Properties.Theta.Slice(ThetaX.Rows * ThetaX.Cols, Theta.Length - 1).Reshape(references, VectorType.Row);

            // create reference mappings, each value is the original index.
            this.ReferenceFeatureMap = (this.ReferenceFeatureMap == null ? Vector.Create(references, i => i) : this.ReferenceFeatureMap);
            this.EntityFeatureMap    = (this.EntityFeatureMap == null ? Vector.Create(entities, i => i) : this.EntityFeatureMap);

            return(new CofiRecommenderModel()
            {
                Descriptor = this.Descriptor,
                NormalizeFeatures = this.NormalizeFeatures,
                FeatureNormalizer = this.FeatureNormalizer,
                FeatureProperties = this.FeatureProperties,
                Ratings = this.Ratings,
                ReferenceFeatureMap = this.ReferenceFeatureMap,
                EntityFeatureMap = this.EntityFeatureMap,
                Mu = mean,
                Y = y,
                Reference = X,
                ThetaX = ThetaX,
                ThetaY = ThetaY
            });
        }