示例#1
0
        /// <summary>
        /// Compute gradients of `loss` for the variables in `var_list`.
        /// </summary>
        /// <param name="loss"></param>
        /// <param name="gate_gradients"></param>
        /// <returns>
        /// A list of (gradient, variable) pairs. Variable is always present, but
        /// gradient can be `None`.
        /// </returns>
        public List <KeyValuePair <object, object> > compute_gradients(Tensor loss,
                                                                       List <RefVariable> var_list      = null,
                                                                       int?aggregation_method           = null,
                                                                       GateGradientType gate_gradients  = GateGradientType.GATE_OP,
                                                                       bool colocate_gradients_with_ops = false,
                                                                       List <Tensor> grad_loss          = null)
        {
            int num_towers = 1;

            if (distribute_lib.get_loss_reduction() == VariableAggregationType.MEAN)
            {
            }

            var tmp = variables.trainable_variables();

            switch (tmp)
            {
            case List <RefVariable> values:
                var_list = values;
                break;
            }

            var processors = var_list.Select(v => optimizer._get_processor(v)).ToList();
            var var_refs   = processors.Select(x => x.target()).ToList();

            gradients_impl.gradients(loss, var_refs, grad_ys: grad_loss,
                                     gate_gradients: (gate_gradients == GateGradientType.GATE_OP),
                                     aggregation_method: aggregation_method,
                                     colocate_gradients_with_ops: colocate_gradients_with_ops);

            return(null);
        }
示例#2
0
        /// <summary>
        /// Add operations to minimize `loss` by updating `var_list`
        /// </summary>
        /// <param name="loss"></param>
        /// <returns>
        /// An Operation that updates the variables in `var_list`.  If `global_step`
        /// was not `None`, that operation also increments `global_step`.
        /// </returns>
        public Operation minimize(Tensor loss,
                                  GateGradientType gate_gradients  = GateGradientType.GATE_OP,
                                  bool colocate_gradients_with_ops = false)
        {
            var grads_and_vars = compute_gradients(loss,
                                                   gate_gradients: gate_gradients,
                                                   colocate_gradients_with_ops: colocate_gradients_with_ops);

            return(null);
        }
示例#3
0
        /// <summary>
        /// Add operations to minimize `loss` by updating `var_list`
        /// </summary>
        /// <param name="loss"></param>
        /// <returns></returns>
        public Optimizer minimize(Tensor loss,
                                  GateGradientType gate_gradients  = GateGradientType.GATE_OP,
                                  bool colocate_gradients_with_ops = false)
        {
            compute_gradients(loss,
                              gate_gradients: gate_gradients,
                              colocate_gradients_with_ops: colocate_gradients_with_ops);

            return(this);
        }
示例#4
0
        /// <summary>
        /// Add operations to minimize `loss` by updating `var_list`
        /// </summary>
        /// <param name="loss"></param>
        /// <returns>
        /// An Operation that updates the variables in `var_list`.  If `global_step`
        /// was not `None`, that operation also increments `global_step`.
        /// </returns>
        public Operation minimize(Tensor loss,
                                  GateGradientType gate_gradients  = GateGradientType.GATE_OP,
                                  bool colocate_gradients_with_ops = false)
        {
            var grads_and_vars = compute_gradients(loss,
                                                   gate_gradients: gate_gradients,
                                                   colocate_gradients_with_ops: colocate_gradients_with_ops);

            var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();

            if (vars_with_grad.Length == 0)
            {
                throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
                                     $" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");
            }

            return(apply_gradients(grads_and_vars));
        }
示例#5
0
        /// <summary>
        /// Add operations to minimize `loss` by updating `var_list`
        ///
        ///  This method simply combines calls `compute_gradients()` and
        ///  `apply_gradients()`. If you want to process the gradient before applying
        ///  them call `compute_gradients()` and `apply_gradients()` explicitly instead
        ///  of using this function.
        /// </summary>
        /// <param name="loss">A `Tensor` containing the value to minimize.</param>
        /// <param name="global_step">Optional `Variable` to increment by one after the
        /// variables have been updated.</param>
        /// <param name="var_list">Optional list or tuple of `Variable` objects to update to
        /// minimize `loss`.  Defaults to the list of variables collected in
        /// the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.</param>
        /// <param name="gate_gradients">
        /// How to gate the computation of gradients.  Can be
        /// `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
        /// </param>
        /// <param name="aggregation_method">
        /// Specifies the method used to combine gradient terms.
        /// Valid values are defined in the class `AggregationMethod`.
        /// </param>
        /// <param name="colocate_gradients_with_ops"></param>
        /// <param name="name">Optional name for the returned operation.</param>
        /// <param name="grad_loss">Optional. A `Tensor` holding the gradient computed for `loss`.</param>
        /// <returns>
        /// An Operation that updates the variables in `var_list`.  If `global_step`
        /// was not `None`, that operation also increments `global_step`.
        /// </returns>
        public Operation minimize(Tensor loss,
                                  RefVariable global_step          = null,
                                  List <RefVariable> var_list      = null,
                                  GateGradientType gate_gradients  = GateGradientType.GATE_OP,
                                  int?aggregation_method           = null,
                                  bool colocate_gradients_with_ops = false, string name = null, Tensor grad_loss = null)
        {
            // TODO: strongly type aggregation_method
            var grads_and_vars = compute_gradients(loss, var_list: var_list,
                                                   gate_gradients: gate_gradients,
                                                   aggregation_method: aggregation_method,
                                                   colocate_gradients_with_ops: colocate_gradients_with_ops);

            var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();

            if (vars_with_grad.Length == 0)
            {
                throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
                                     $" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");
            }

            return(apply_gradients(grads_and_vars, global_step: global_step, name: name));
        }