/// <summary> /// Performs gradient descent to optomise theta parameters. /// </summary> /// <param name="theta">Initial Theta (Zeros)</param> /// <param name="x">Training set</param> /// <param name="y">Training labels</param> /// <param name="maxIterations">Maximum number of iterations to run gradient descent</param> /// <param name="learningRateAlpha">The learning rate (Alpha)</param> /// <param name="costFunction">Cost function to use for gradient descent</param> /// <param name="lambda">The regularization constant to apply</param> /// <param name="regularizer">The regularization function to apply</param> /// <returns></returns> public static Tuple <double, Vector> Run( Vector theta, Matrix x, Vector y, int maxIterations, double learningRateAlpha, ICostFunction costFunction, double lambda, IRegularizer regularizer) { var bestTheta = theta.Copy(); var bestCost = double.PositiveInfinity; double currentCost = 0; var currentGradient = theta.Copy(); for (var i = 0; i <= maxIterations; i++) { currentCost = costFunction.ComputeCost(bestTheta, x, y, lambda, regularizer); currentGradient = costFunction.ComputeGradient(bestTheta, x, y, lambda, regularizer); if (currentCost < bestCost) { bestTheta = bestTheta - learningRateAlpha * currentGradient; bestCost = currentCost; } else { learningRateAlpha = learningRateAlpha * 0.99; } } return(new Tuple <double, Vector>(bestCost, bestTheta)); }
protected override IVariableV1 add_weight(string name, TensorShape shape = null, TF_DataType dtype = TF_DataType.TF_FLOAT, IInitializer initializer = null, IRegularizer regularizer = null, VariableSynchronization synchronization = VariableSynchronization.OnRead, VariableAggregation aggregation = VariableAggregation.Sum, bool trainable = true, Func <VariableArgs, IVariableV1> getter = null) { if (shape == null) { shape = new TensorShape(new int[0]); } return(tf_with(ops.init_scope(), delegate { return base.add_weight(name, shape, dtype: dtype, trainable: false, initializer: initializer, synchronization: synchronization, aggregation: aggregation); })); }
/// <summary> /// Performs gradient descent to optomise theta parameters. /// </summary> /// <param name="theta">Initial Theta (Zeros)</param> /// <param name="x">Training set</param> /// <param name="y">Training labels</param> /// <param name="maxIterations">Maximum number of iterations to run gradient descent</param> /// <param name="learningRateAlpha">The learning rate (Alpha)</param> /// <param name="costFunction">Cost function to use for gradient descent</param> /// <param name="lambda">The regularization constant to apply</param> /// <param name="regularizer">The regularization function to apply</param> /// <returns></returns> public static Tuple<double, Vector> Run( Vector theta, Matrix x, Vector y, int maxIterations, double learningRateAlpha, ICostFunction costFunction, double lambda, IRegularizer regularizer) { var bestTheta = theta.Copy(); var bestCost = double.PositiveInfinity; double currentCost = 0; var currentGradient = theta.Copy(); for (var i = 0; i <= maxIterations; i++) { currentCost = costFunction.ComputeCost(bestTheta, x, y, lambda, regularizer); currentGradient = costFunction.ComputeGradient(bestTheta, x, y, lambda, regularizer); if (currentCost < bestCost) { bestTheta = bestTheta - learningRateAlpha * currentGradient; bestCost = currentCost; } else { learningRateAlpha = learningRateAlpha * 0.99; } } return new Tuple<double, Vector>(bestCost, bestTheta); }
/// <summary> /// Create lambdas which compute regularization losses. /// </summary> /// <param name="name"></param> /// <param name="variable"></param> /// <param name="regularizer"></param> void _handle_weight_regularization(string name, IVariableV1 variable, IRegularizer regularizer) { add_loss(() => tf_with(ops.name_scope(name + "/Regularizer"), scope => regularizer.Apply(new RegularizerArgs(variable.AsTensor()) { }) )); }
/// <summary> /// /// </summary> /// <param name="filters"></param> /// <param name="kernel_size"></param> /// <param name="strides"></param> /// <param name="padding"></param> /// <param name="data_format"></param> /// <param name="dilation_rate"></param> /// <param name="groups"></param> /// <param name="activation">tf.keras.activations</param> /// <param name="use_bias"></param> /// <param name="kernel_initializer"></param> /// <param name="bias_initializer"></param> /// <param name="kernel_regularizer"></param> /// <param name="bias_regularizer"></param> /// <param name="activity_regularizer"></param> /// <returns></returns> public Conv2D Conv2D(int filters, TensorShape kernel_size = null, TensorShape strides = null, string padding = "valid", string data_format = null, TensorShape dilation_rate = null, int groups = 1, Activation activation = null, bool use_bias = true, IInitializer kernel_initializer = null, IInitializer bias_initializer = null, IRegularizer kernel_regularizer = null, IRegularizer bias_regularizer = null, IRegularizer activity_regularizer = null) => new Conv2D(new Conv2DArgs
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularization constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public double ComputeCost(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; double j = 0.0; Vector s = (X * theta).ToVector(); j = 1.0 / (2.0 * m) * ((s - y) ^ 2.0).Sum(); if (lambda != 0) { j = regularizer.Regularize(j, theta, m, lambda); } return j; }
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularization constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public double ComputeCost(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; double j = 0.0; Vector s = (X * theta).ToVector(); j = 1.0 / (2.0 * m) * ((s - y) ^ 2.0).Sum(); if (lambda != 0) { j = regularizer.Regularize(j, theta, m, lambda); } return(j); }
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularisation constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public Vector ComputeGradient(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; Vector gradient = Vector.Zeros(theta.Length); Vector s = (X * theta).ToVector(); for (int i = 0; i < theta.Length; i++) { gradient[i] = 1.0 / m * ((s - y) * X[i, VectorType.Col]).Sum(); } if (lambda != 0) { gradient = regularizer.Regularize(theta, gradient, m, lambda); } return gradient; }
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularization constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public double ComputeCost(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; double j = 0.0; Vector s = (X * theta).ToVector(); IFunction function = new Logistic(); s = s.Each(v => function.Compute(v)); Vector slog = s.Copy().Each(v => System.Math.Log(System.Math.Abs(1.0 - v))); j = (-1.0 / m) * ( (y.Dot(s.Log())) + (-1.0 * ((1.0 - y).Dot(slog))) ); if (lambda != 0) { j = regularizer.Regularize(j, theta, m, lambda); } return j; }
/// <summary> /// Compute the error gradient of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularisation constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public Vector ComputeGradient(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; Vector gradient = Vector.Zeros(theta.Length); Vector s = (X * theta).ToVector(); IFunction function = new Logistic(); s = s.Each(v => function.Compute(v)); for (int i = 0; i < theta.Length; i++) { gradient[i] = (1.0 / m) * ((s - y) * X[i, VectorType.Col]).Sum(); } if (lambda != 0) { gradient = regularizer.Regularize(theta, gradient, m, lambda); } return gradient; }
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularization constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public double ComputeCost(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { var m = X.Rows; var j = 0.0; var s = (X * theta).ToVector(); IFunction function = new Logistic(); s = s.Each(v => function.Compute(v)); var slog = s.Copy().Each(v => Math.Log(Math.Abs(1.0 - v))); j = (-1.0 / m) * (y.Dot(s.Log()) + (-1.0 * (1.0 - y).Dot(slog))); if (lambda != 0) { j = regularizer.Regularize(j, theta, m, lambda); } return(j); }
public void Regularize(IRegularizer regularizer) { regularizer.Regularize(Weights, WeightsGradient); }
/// <summary> /// Compute the error cost of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularisation constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public Vector ComputeGradient(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { int m = X.Rows; Vector gradient = Vector.Zeros(theta.Length); Vector s = (X * theta).ToVector(); for (int i = 0; i < theta.Length; i++) { gradient[i] = 1.0 / m * ((s - y) * X[i, VectorType.Col]).Sum(); } if (lambda != 0) { gradient = regularizer.Regularize(theta, gradient, m, lambda); } return(gradient); }
public void Regularize(IRegularizer regularizer) { regularizer.Regularize(Filters, FiltersGradient); }
/// <summary> /// Create lambdas which compute regularization losses. /// </summary> /// <param name="name"></param> /// <param name="variable"></param> /// <param name="regularizer"></param> void _handle_weight_regularization(string name, IVariableV1 variable, IRegularizer regularizer) { add_loss(() => regularizer.Apply(new RegularizerArgs { })); }
/// <summary> /// Compute the error gradient of the given Theta parameter for the training and label sets /// </summary> /// <param name="theta">Learning Theta parameters</param> /// <param name="X">Training set</param> /// <param name="y">Training labels</param> /// <param name="lambda">Regularisation constant</param> /// <param name="regularizer">Regularization term function.</param> /// <returns></returns> public Vector ComputeGradient(Vector theta, Matrix X, Vector y, double lambda, IRegularizer regularizer) { var m = X.Rows; var gradient = Vector.Zeros(theta.Length); var s = (X * theta).ToVector(); IFunction function = new Logistic(); s = s.Each(v => function.Compute(v)); for (var i = 0; i < theta.Length; i++) { gradient[i] = (1.0 / m) * ((s - y) * X[i, VectorType.Col]).Sum(); } if (lambda != 0) { gradient = regularizer.Regularize(theta, gradient, m, lambda); } return(gradient); }
protected virtual IVariableV1 add_weight(string name, Shape shape, TF_DataType dtype = TF_DataType.TF_FLOAT, IInitializer initializer = null, IRegularizer regularizer = null, VariableSynchronization synchronization = VariableSynchronization.Auto, VariableAggregation aggregation = VariableAggregation.None, bool trainable = true, Func <VariableArgs, IVariableV1> getter = null) { // Initialize variable when no initializer provided if (initializer == null) { // If dtype is DT_FLOAT, provide a uniform unit scaling initializer if (dtype.is_floating()) { initializer = tf.glorot_uniform_initializer; } else if (dtype.is_integer()) { initializer = tf.zeros_initializer; } else { throw new ValueError($"An initializer for variable {name} of type {dtype.as_base_dtype()} is required for layer {name}"); } } if (synchronization == VariableSynchronization.OnRead) { trainable = false; } var args = new VariableArgs { Name = name, Shape = shape, DType = dtype, Getter = getter ?? base_layer_utils.make_variable, Overwrite = true, Initializer = initializer, Synchronization = synchronization, Aggregation = aggregation, Trainable = trainable }; var variable = _add_variable_with_custom_getter(args); if (regularizer != null) { var name_in_scope = variable.Name.Split(':')[0]; _handle_weight_regularization(name_in_scope, variable, regularizer); } //backend.track_variable(variable); if (trainable == true) { trainable_weights.Add(variable); } else { non_trainable_weights.Add(variable); } return(variable); }
public NeuralNetwork UseRegularizer(IRegularizer regularizer) { Regularizer = regularizer; return(this); }