예제 #1
0
 protected override void build(TensorShape input_shape)
 {
     embeddings = add_weight(shape: new int[] { input_dim, output_dim },
                             initializer: embeddings_initializer,
                             name: "embeddings");
     built = true;
 }
예제 #2
0
 public override Operation _apply_sparse(IndexedSlices grad, RefVariable var)
 {
     return(_apply_sparse_shared(grad.values, var, grad.indices, (x, i, v) =>
     {
         return state_ops.scatter_add(x, i, v, use_locking: _use_locking);
     }));
 }
예제 #3
0
        public _InitializeClustersOpFactory(Tensor[] inputs,
                                            Tensor num_clusters,
                                            string initial_clusters,
                                            string distance_metric,
                                            int random_seed,
                                            int kmeans_plus_plus_num_retries,
                                            int kmc2_chain_length,
                                            RefVariable cluster_centers,
                                            RefVariable cluster_centers_updated,
                                            RefVariable cluster_centers_initialized)
        {
            _inputs                       = inputs;
            _num_clusters                 = num_clusters;
            _initial_clusters             = initial_clusters;
            _distance_metric              = distance_metric;
            _random_seed                  = random_seed;
            _kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries;
            _kmc2_chain_length            = kmc2_chain_length;
            _cluster_centers              = cluster_centers;
            _cluster_centers_updated      = cluster_centers_updated;
            _cluster_centers_initialized  = cluster_centers_initialized;

            _num_selected  = array_ops.shape(_cluster_centers).slice(0);
            _num_remaining = _num_clusters - _num_selected;

            _num_data = math_ops.add_n(_inputs.Select(i => array_ops.shape(i).slice(0)).ToArray());
        }
예제 #4
0
        protected override void build(TensorShape input_shape)
        {
            var last_dim = input_shape.dims.Last();
            var axes     = new Dictionary <int, int>();

            axes[-1]   = last_dim;
            input_spec = new InputSpec(min_ndim: 2, axes: axes);
            kernel     = add_weight(
                "kernel",
                shape: new int[] { last_dim, units },
                initializer: kernel_initializer,
                dtype: _dtype,
                trainable: true);
            if (use_bias)
            {
                bias = add_weight(
                    "bias",
                    shape: new int[] { units },
                    initializer: bias_initializer,
                    dtype: _dtype,
                    trainable: true);
            }

            built = true;
        }
예제 #5
0
        public VdCnn(int alphabet_size, int document_max_len, int num_class)
        {
            embedding_size  = 16;
            filter_sizes    = new int[] { 3, 3, 3, 3, 3 };
            num_filters     = new int[] { 64, 64, 128, 256, 512 };
            num_blocks      = new int[] { 2, 2, 2, 2 };
            learning_rate   = 0.001f;
            cnn_initializer = tf.keras.initializers.he_normal();
            x           = tf.placeholder(tf.int32, new TensorShape(-1, document_max_len), name: "x");
            y           = tf.placeholder(tf.int32, new TensorShape(-1), name: "y");
            is_training = tf.placeholder(tf.boolean, new TensorShape(), name: "is_training");
            global_step = tf.Variable(0, trainable: false);

            // Embedding Layer
            with(tf.name_scope("embedding"), delegate
            {
                var init_embeddings = tf.random_uniform(new int[] { alphabet_size, embedding_size }, -1.0f, 1.0f);
                embeddings          = tf.get_variable("embeddings", initializer: init_embeddings);
                x_emb      = tf.nn.embedding_lookup(embeddings, x);
                x_expanded = tf.expand_dims(x_emb, -1);
            });

            // First Convolution Layer
            with(tf.variable_scope("conv-0"), delegate
            {
                var conv0 = tf.layers.conv2d(x_expanded,
                                             filters: num_filters[0],
                                             kernel_size: new int[] { filter_sizes[0], embedding_size },
                                             kernel_initializer: cnn_initializer,
                                             activation: tf.nn.relu);
            });
        }
예제 #6
0
        private RefVariable[] _create_variables(Tensor num_clusters)
        {
            var         init_value                  = constant_op.constant(new float[0], dtype: TF_DataType.TF_FLOAT);
            var         cluster_centers             = tf.Variable(init_value, name: CLUSTERS_VAR_NAME, validate_shape: false);
            var         cluster_centers_initialized = tf.Variable(false, dtype: TF_DataType.TF_BOOL, name: "initialized");
            RefVariable update_in_steps             = null;

            if (_use_mini_batch && _mini_batch_steps_per_iteration > 1)
            {
                throw new NotImplementedException("KMeans._create_variables");
            }
            else
            {
                var cluster_centers_updated = cluster_centers;
                var ones           = array_ops.ones(new Tensor[] { num_clusters }, dtype: TF_DataType.TF_INT64);
                var cluster_counts = _use_mini_batch ? tf.Variable(ones) : null;
                return(new RefVariable[]
                {
                    cluster_centers,
                    cluster_centers_initialized,
                    cluster_counts,
                    cluster_centers_updated,
                    update_in_steps
                });
            }
        }
예제 #7
0
        private Operation _apply_sparse_shared(Tensor grad, RefVariable var, Tensor indices, Func <RefVariable, Tensor, Tensor, Tensor> scatter_add)
        {
            var(beta1_power_v, beta2_power_v) = _get_beta_accumulators();
            Tensor beta1_power       = math_ops.cast(beta1_power_v, var.dtype.as_base_dtype());
            Tensor beta2_power       = math_ops.cast(beta2_power_v, var.dtype.as_base_dtype());
            var    lr_t              = math_ops.cast(_lr_t, var.dtype.as_base_dtype());
            var    beta1_t           = math_ops.cast(_beta1_t, var.dtype.as_base_dtype());
            var    beta2_t           = math_ops.cast(_beta2_t, var.dtype.as_base_dtype());
            var    epsilon_t         = math_ops.cast(_epsilon_t, var.dtype.as_base_dtype());
            var    lr                = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power));
            var    m                 = get_slot(var, "m");
            var    m_scaled_g_values = grad * (1 - beta1_t);
            var    m_t               = state_ops.assign(m, m * beta1_t, use_locking: _use_locking);

            with(ops.control_dependencies(new[] { m_t }), delegate
            {
                m_t = scatter_add(m, indices, m_scaled_g_values);
            });

            var v = get_slot(var, "v");
            var v_scaled_g_values = (grad * grad) * (1 - beta2_t);
            var v_t = state_ops.assign(v, v * beta2_t, use_locking: _use_locking);

            with(ops.control_dependencies(new[] { v_t }), delegate
            {
                v_t = scatter_add(v, indices, v_scaled_g_values);
            });
            var v_sqrt     = math_ops.sqrt(v_t);
            var var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking: _use_locking);

            return(control_flow_ops.group(new[] { var_update, m_t, v_t }));
        }
예제 #8
0
        public Tensor __call__(RefVariable step)
        {
            return(tf_with(ops.name_scope(name ?? "PolynomialDecay"), scope =>
            {
                name = scope;
                var initial_learning_rate_tensor = ops.convert_to_tensor(initial_learning_rate, name: "initial_learning_rate");
                var dtype = initial_learning_rate_tensor.dtype;
                var end_learning_rate_tensor = math_ops.cast(end_learning_rate, dtype);
                var power_tensor = math_ops.cast(power, dtype);

                var global_step_recomp = math_ops.cast(step, dtype);
                var decay_steps_recomp = math_ops.cast(decay_steps, dtype);

                if (cycle)
                {
                    throw new NotImplementedException("PolynomialDecay cycle");
                }
                else
                {
                    // Make sure that the global_step used is not bigger than decay_steps.
                    global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps);
                }

                var p = tf.divide(global_step_recomp, decay_steps_recomp);
                var pow = tf.pow(1 - p, power_tensor);
                var m = math_ops.multiply(initial_learning_rate_tensor - end_learning_rate_tensor, pow);
                return math_ops.add(m,
                                    end_learning_rate_tensor,
                                    name: name);
            }));
        }
예제 #9
0
 public Tensor _assign_moving_average(RefVariable variable, Tensor value, Tensor momentum)
 {
     return(tf_with(ops.name_scope(null, "AssignMovingAvg", new { variable, value, momentum }), scope =>
     {
         // var cm = ops.colocate_with(variable);
         var decay = ops.convert_to_tensor(1.0f - momentum, name: "decay");
         var update_delta = (variable - math_ops.cast(value, variable.dtype)) * decay;
         return state_ops.assign_sub(variable, update_delta, name: scope);
     }));
 }
예제 #10
0
        /// <summary>
        /// Create a slot initialized to the given value.
        /// </summary>
        /// <param name="primary"></param>
        /// <param name="val"></param>
        /// <param name="name"></param>
        /// <param name="colocate_with_primary"></param>
        /// <returns></returns>
        public RefVariable create_slot(RefVariable primary, Tensor val, string name, bool colocate_with_primary = true)
        {
            var validate_shape = val.TensorShape.is_fully_defined();
            var prefix         = primary.Op.name;

            return(tf_with(tf.variable_scope(name: null, prefix + "/" + name), delegate
            {
                return _create_slot_var(primary, val, "", validate_shape, null, TF_DataType.DtInvalid);
            }));
        }
예제 #11
0
        /// <summary>
        /// Creates a slot initialized using an `Initializer`.
        /// </summary>
        /// <returns></returns>
        public RefVariable create_slot_with_initializer(RefVariable primary, IInitializer initializer, TensorShape shape,
                                                        TF_DataType dtype, string name, bool colocate_with_primary = true)
        {
            var validate_shape = shape.is_fully_defined();
            var prefix         = primary.Op.name;

            return(tf_with(new variable_scope(string.Empty, prefix + "/" + name), delegate
            {
                return _create_slot_var(primary, initializer, "", validate_shape, shape, dtype);
            }));
        }
예제 #12
0
 public static Tensor Relu(Tensor x)
 {
     using (tf.name_scope("relu"))
     {
         int[]       w_shape = { x.shape[1], 1 };
         RefVariable w       = tf.Variable(tf.random_normal(w_shape), name: "weights");
         RefVariable b       = tf.Variable(0.0f, name: "bias");
         Tensor      z       = tf.add(tf.matmul(x, w), b);
         return(tf.maximum(z, 0, "relu"));
     }
 }
예제 #13
0
 public Tensor __call__(Tensor inp, RefVariable filter)
 {
     return(conv_op(new
     {
         input = inp,
         filter,
         strides,
         padding,
         data_format,
         name
     }));
 }
예제 #14
0
 public Tensor __call__(Tensor inp, RefVariable filter)
 {
     return(conv_op(new Conv2dParams
     {
         Input = inp,
         Filter = filter,
         Strides = strides,
         Padding = padding,
         DataFormat = data_format,
         Name = name
     }));
 }
예제 #15
0
        public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null)
        {
            var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new
            {
                var,
                alpha,
                delta,
                use_locking
            });

            return(_op.outputs[0]);
        }
예제 #16
0
        public ILayer __build__(TensorShape input_shape, int seed = 1, float stddev = -1f)
        {
            Console.WriteLine("Building Layer \"" + name + "\" ...");
            if (stddev == -1)
            {
                stddev = (float)(1 / Math.Sqrt(2));
            }
            var dim       = input_shape.dims;
            var input_dim = dim[dim.Length - 1];

            W      = tf.Variable(create_tensor(new int[] { input_dim, units }, seed: seed, stddev: (float)stddev));
            WShape = new TensorShape(W.shape);
            return(this);
        }
예제 #17
0
        /// <summary>
        /// Compute the moving average of a variable.
        /// </summary>
        /// <param name="variable"></param>
        /// <param name="value"></param>
        /// <param name="decay"></param>
        /// <param name="zero_debias"></param>
        /// <param name="name"></param>
        /// <returns></returns>
        public static Tensor assign_moving_average(RefVariable variable, RefVariable value, Tensor decay,
                                                   bool zero_debias = true, string name = null)
        {
            return(tf_with(ops.name_scope(name, "AssignMovingAvg", new { variable, value, decay }), scope =>
            {
                decay = ops.convert_to_tensor(1.0f - decay, name: "decay");
                if (decay.dtype != variable.dtype.as_base_dtype())
                {
                    decay = math_ops.cast(decay, variable.dtype.as_base_dtype());
                }

                return state_ops.assign_sub(variable, (variable - value) * decay, name: scope);
            }));
        }
예제 #18
0
        protected override void build(TensorShape input_shape)
        {
            int channel_axis = data_format == "channels_first" ? 1 : -1;
            int input_dim    = channel_axis < 0 ?
                               input_shape.dims[input_shape.ndim + channel_axis] :
                               input_shape.dims[channel_axis];
            var kernel_shape = new int[] { kernel_size[0], kernel_size[1], input_dim, filters };

            kernel = add_weight(name: "kernel",
                                shape: kernel_shape,
                                initializer: kernel_initializer,
                                trainable: true,
                                dtype: _dtype);
            if (use_bias)
            {
                bias = add_weight(name: "bias",
                                  shape: new int[] { filters },
                                  initializer: bias_initializer,
                                  trainable: true,
                                  dtype: _dtype);
            }

            var axes = new Dictionary <int, int>();

            axes.Add(-1, input_dim);
            input_spec = new InputSpec(ndim: rank + 2, axes: axes);

            string op_padding;

            if (padding == "causal")
            {
                op_padding = "valid";
            }
            else
            {
                op_padding = padding;
            }

            var df = conv_utils.convert_data_format(data_format, rank + 2);

            _convolution_op = nn_ops.Convolution(input_shape,
                                                 kernel.shape,
                                                 op_padding.ToUpper(),
                                                 strides,
                                                 dilation_rate,
                                                 data_format: df);

            built = true;
        }
 private void variable_summaries(RefVariable var)
 {
     tf_with(tf.name_scope("summaries"), delegate
     {
         var mean = tf.reduce_mean(var);
         tf.summary.scalar("mean", mean);
         Tensor stddev = null;
         tf_with(tf.name_scope("stddev"), delegate
         {
             stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)));
         });
         tf.summary.scalar("stddev", stddev);
         tf.summary.scalar("max", tf.reduce_max(var));
         tf.summary.scalar("min", tf.reduce_min(var));
         tf.summary.histogram("histogram", var);
     });
 }
    void loadSession()
    {
        //TextAsset graphModel = Resources.Load("model.pb") as TextAsset;
        //graph = new Graph();
        //graph.Import(graphModel.bytes);
        //sess = new Session(graph);
        sess = new Session();

        // tf Graph Input
        X = tf.placeholder(tf.float32);
        Y = tf.placeholder(tf.float32);
        //W = tf.Variable(0.0317f, dtype: tf.float32, name: "weight");
        //b = tf.Variable( -0.125f, dtype: tf.float32, name: "bias");
        W = tf.Variable(0.3f, dtype: tf.float32, name: "weight");
        b = tf.Variable(-0.1f, dtype: tf.float32, name: "bias");
        // Construct a linear model
        pred = tf.add(tf.multiply(X, W), b);
    }
        // Example of how to create a neuron layer from scratch, use tf.layers.dense instead
        public static Tensor NeuronLayer(Tensor X, int nNeurons, string name, IActivation activation = null)
        {
            using (tf.name_scope(name))
            {
                int         nInputs = X.shape[1];
                NDArray     stddev  = 2 / np.sqrt(nInputs);
                Tensor      init    = tf.truncated_normal(new[] { nInputs, nNeurons }, stddev: stddev);
                RefVariable W       = tf.Variable(init, name: "kernel");
                RefVariable b       = tf.Variable(tf.zeros(new[] { nNeurons }), name: "bias");
                Tensor      Z       = tf.matmul(X, W) + b;

                if (activation != null)
                {
                    return(activation.Activate(Z));
                }

                return(Z);
            }
        }
예제 #22
0
        public override Operation _apply_dense(Tensor grad, RefVariable var)
        {
            var m = get_slot(var, "m");
            var v = get_slot(var, "v");

            var(beta1_power, beta2_power) = _get_beta_accumulators();
            return(gen_training_ops.apply_adam(
                       var,
                       m,
                       v,
                       math_ops.cast(beta1_power, var.dtype.as_base_dtype()),
                       math_ops.cast(beta2_power, var.dtype.as_base_dtype()),
                       math_ops.cast(_lr_t, var.dtype.as_base_dtype()),
                       math_ops.cast(_beta1_t, var.dtype.as_base_dtype()),
                       math_ops.cast(_beta2_t, var.dtype.as_base_dtype()),
                       math_ops.cast(_epsilon_t, var.dtype.as_base_dtype()),
                       grad,
                       use_locking: _use_locking).op);
        }
예제 #23
0
        /// <summary>
        /// Create a slot initialized to 0 with same shape as the primary object.
        /// </summary>
        /// <param name="primary"></param>
        /// <param name="name"></param>
        /// <param name="dtype"></param>
        /// <param name="colocate_with_primary"></param>
        /// <returns></returns>
        public RefVariable create_zeros_slot(RefVariable primary, string name, TF_DataType dtype = TF_DataType.DtInvalid, bool colocate_with_primary = true)
        {
            if (dtype == TF_DataType.DtInvalid)
            {
                dtype = primary.dtype;
            }
            var slot_shape = primary.shape;

            if (slot_shape.is_fully_defined())
            {
                var initializer = new Zeros();
                return(create_slot_with_initializer(
                           primary, initializer, slot_shape, dtype, name,
                           colocate_with_primary: colocate_with_primary));
            }
            else
            {
                throw new NotImplementedException("create_zeros_slot is not fully defined.");
            }
        }
예제 #24
0
        public static Tensor apply_adam(RefVariable var, RefVariable m, RefVariable v, Tensor beta1_power, Tensor beta2_power,
                                        Tensor lr, Tensor beta1, Tensor beta2, Tensor epsilon, Tensor grad,
                                        bool use_locking = false, bool use_nesterov = false, string name = null)
        {
            var _op = _op_def_lib._apply_op_helper("ApplyAdam", name, new
            {
                var,
                m,
                v,
                beta1_power,
                beta2_power,
                lr,
                beta1,
                beta2,
                epsilon,
                grad,
                use_locking,
                use_nesterov
            });

            return(_op.outputs[0]);
        }
예제 #25
0
        public static RefVariable get_global_step(Graph graph = null)
        {
            graph = graph ?? ops.get_default_graph();
            RefVariable global_step_tensor  = null;
            var         global_step_tensors = graph.get_collection <RefVariable>(tf.GraphKeys.GLOBAL_STEP);

            if (global_step_tensors.Count == 1)
            {
                global_step_tensor = global_step_tensors[0];
            }
            else
            {
                try
                {
                    global_step_tensor = graph.get_tensor_by_name("global_step:0");
                }
                catch (KeyError)
                {
                    return(null);
                }
            }

            return(global_step_tensor);
        }
예제 #26
0
 public Tensor __call__(Tensor inp, RefVariable filter)
 {
     return(call.__call__(inp, filter));
 }
예제 #27
0
        public VdCnn(int alphabet_size, int document_max_len, int num_class)
        {
            embedding_size  = 16;
            filter_sizes    = new int[] { 3, 3, 3, 3, 3 };
            num_filters     = new int[] { 64, 64, 128, 256, 512 };
            num_blocks      = new int[] { 2, 2, 2, 2 };
            learning_rate   = 0.001f;
            cnn_initializer = tensorflow.keras.initializers.he_normal();
            fc_initializer  = tf.truncated_normal_initializer(stddev: 0.05f);

            x           = tf.placeholder(tf.int32, new TensorShape(-1, document_max_len), name: "x");
            y           = tf.placeholder(tf.int32, new TensorShape(-1), name: "y");
            is_training = tf.placeholder(tf.@bool, new TensorShape(), name: "is_training");
            global_step = tf.Variable(0, trainable: false);

            // Embedding Layer
            tf_with(tf.name_scope("embedding"), delegate
            {
                var init_embeddings = tf.random_uniform(new int[] { alphabet_size, embedding_size }, -1.0f, 1.0f);
                embeddings          = tf.get_variable("embeddings", initializer: init_embeddings);
                x_emb      = tf.nn.embedding_lookup(embeddings, x);
                x_expanded = tf.expand_dims(x_emb, -1);
            });

            Tensor conv0   = null;
            Tensor conv1   = null;
            Tensor conv2   = null;
            Tensor conv3   = null;
            Tensor conv4   = null;
            Tensor h_flat  = null;
            Tensor fc1_out = null;
            Tensor fc2_out = null;

            // First Convolution Layer
            tf_with(tf.variable_scope("conv-0"), delegate
            {
                conv0 = tf.layers.conv2d(x_expanded,
                                         filters: num_filters[0],
                                         kernel_size: new int[] { filter_sizes[0], embedding_size },
                                         kernel_initializer: cnn_initializer,
                                         activation: tf.nn.relu());

                conv0 = tf.transpose(conv0, new int[] { 0, 1, 3, 2 });
            });

            tf_with(tf.name_scope("conv-block-1"), delegate {
                conv1 = conv_block(conv0, 1);
            });

            tf_with(tf.name_scope("conv-block-2"), delegate {
                conv2 = conv_block(conv1, 2);
            });

            tf_with(tf.name_scope("conv-block-3"), delegate {
                conv3 = conv_block(conv2, 3);
            });

            tf_with(tf.name_scope("conv-block-4"), delegate
            {
                conv4 = conv_block(conv3, 4, max_pool: false);
            });

            // ============= k-max Pooling =============
            tf_with(tf.name_scope("k-max-pooling"), delegate
            {
                var h     = tf.transpose(tf.squeeze(conv4, new int[] { -1 }), new int[] { 0, 2, 1 });
                var top_k = tf.nn.top_k(h, k: 8, sorted: false)[0];
                h_flat    = tf.reshape(top_k, new int[] { -1, 512 * 8 });
            });

            // ============= Fully Connected Layers =============
            tf_with(tf.name_scope("fc-1"), scope =>
            {
                fc1_out = tf.layers.dense(h_flat, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer);
            });

            tf_with(tf.name_scope("fc-2"), scope =>
            {
                fc2_out = tf.layers.dense(fc1_out, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer);
            });

            tf_with(tf.name_scope("fc-3"), scope =>
            {
                logits      = tf.layers.dense(fc2_out, num_class, activation: null, kernel_initializer: fc_initializer);
                predictions = tf.argmax(logits, -1, output_type: tf.int32);
            });

            // ============= Loss and Accuracy =============
            tf_with(tf.name_scope("loss"), delegate
            {
                var y_one_hot = tf.one_hot(y, num_class);
                loss          = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot));

                var update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) as List <object>;
                tf_with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate
                {
                    var adam = tf.train.AdamOptimizer(learning_rate);
                    adam.minimize(loss, global_step: global_step);
                });
            });
        }
예제 #28
0
        protected override void build(TensorShape input_shape)
        {
            var ndims = input_shape.ndim;

            foreach (var(idx, x) in enumerate(axis))
            {
                if (x < 0)
                {
                    axis[idx] = ndims + x;
                }
            }

            if (fused)
            {
                if (Enumerable.SequenceEqual(axis, new int[] { 3 }))
                {
                    _data_format = "NHWC";
                }
            }

            var param_dtype = _dtype == TF_DataType.DtInvalid ? TF_DataType.TF_FLOAT : _dtype;
            var param_shape = new int[] { input_shape.dims[axis[0]] };

            if (scale)
            {
                gamma = add_weight("gamma",
                                   param_shape,
                                   dtype: param_dtype,
                                   initializer: gamma_initializer,
                                   trainable: true);
            }
            else
            {
                throw new NotImplementedException("add_weight gamma");
            }

            if (center)
            {
                beta = add_weight("beta",
                                  param_shape,
                                  dtype: param_dtype,
                                  initializer: beta_initializer,
                                  trainable: true);
            }
            else
            {
                throw new NotImplementedException("add_weight beta");
            }

            if (_scope != null)
            {
            }

            moving_mean = (RefVariable)add_weight("moving_mean",
                                                  param_shape,
                                                  dtype: param_dtype,
                                                  initializer: moving_mean_initializer,
                                                  synchronization: VariableSynchronization.OnRead,
                                                  trainable: false,
                                                  aggregation: VariableAggregation.Mean);

            moving_variance = (RefVariable)add_weight("moving_variance",
                                                      shape: param_shape,
                                                      dtype: param_dtype,
                                                      initializer: moving_variance_initializer,
                                                      synchronization: VariableSynchronization.OnRead,
                                                      trainable: false,
                                                      aggregation: VariableAggregation.Mean);

            if (renorm)
            {
                throw new NotImplementedException("build when renorm is true");
            }

            built = true;
        }
        /// <summary>
        /// Adds a new softmax and fully-connected layer for training and eval.
        ///
        /// We need to retrain the top layer to identify our new classes, so this function
        /// adds the right operations to the graph, along with some variables to hold the
        /// weights, and then sets up all the gradients for the backward pass.
        ///
        /// The set up for the softmax and fully-connected layers is based on:
        /// https://www.tensorflow.org/tutorials/mnist/beginners/index.html
        /// </summary>
        /// <param name="class_count"></param>
        /// <param name="final_tensor_name"></param>
        /// <param name="bottleneck_tensor"></param>
        /// <param name="quantize_layer"></param>
        /// <param name="is_training"></param>
        /// <returns></returns>
        private (Operation, Tensor, Tensor, Tensor, Tensor) add_final_retrain_ops(int class_count, string final_tensor_name,
                                                                                  Tensor bottleneck_tensor, bool quantize_layer, bool is_training)
        {
            var(batch_size, bottleneck_tensor_size) = (bottleneck_tensor.TensorShape.dims[0], bottleneck_tensor.TensorShape.dims[1]);
            tf_with(tf.name_scope("input"), scope =>
            {
                bottleneck_input = tf.placeholder_with_default(
                    bottleneck_tensor,
                    shape: bottleneck_tensor.TensorShape.dims,
                    name: "BottleneckInputPlaceholder");

                ground_truth_input = tf.placeholder(tf.int64, new TensorShape(batch_size), name: "GroundTruthInput");
            });

            // Organizing the following ops so they are easier to see in TensorBoard.
            string layer_name = "final_retrain_ops";
            Tensor logits     = null;

            tf_with(tf.name_scope(layer_name), scope =>
            {
                RefVariable layer_weights = null;
                tf_with(tf.name_scope("weights"), delegate
                {
                    var initial_value = tf.truncated_normal(new int[] { bottleneck_tensor_size, class_count }, stddev: 0.001f);
                    layer_weights     = tf.Variable(initial_value, name: "final_weights");
                    variable_summaries(layer_weights);
                });

                RefVariable layer_biases = null;
                tf_with(tf.name_scope("biases"), delegate
                {
                    layer_biases = tf.Variable(tf.zeros(new TensorShape(class_count)), name: "final_biases");
                    variable_summaries(layer_biases);
                });

                tf_with(tf.name_scope("Wx_plus_b"), delegate
                {
                    logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases;
                    tf.summary.histogram("pre_activations", logits);
                });
            });

            final_tensor = tf.nn.softmax(logits, name: final_tensor_name);

            // The tf.contrib.quantize functions rewrite the graph in place for
            // quantization. The imported model graph has already been rewritten, so upon
            // calling these rewrites, only the newly added final layer will be
            // transformed.
            if (quantize_layer)
            {
                throw new NotImplementedException("quantize_layer");

                /*if (is_training)
                 *  tf.contrib.quantize.create_training_graph();
                 * else
                 *  tf.contrib.quantize.create_eval_graph();*/
            }

            tf.summary.histogram("activations", final_tensor);

            // If this is an eval graph, we don't need to add loss ops or an optimizer.
            if (!is_training)
            {
                return(null, null, bottleneck_input, ground_truth_input, final_tensor);
            }

            Tensor cross_entropy_mean = null;

            tf_with(tf.name_scope("cross_entropy"), delegate
            {
                cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
                    labels: ground_truth_input, logits: logits);
            });

            tf.summary.scalar("cross_entropy", cross_entropy_mean);

            tf_with(tf.name_scope("train"), delegate
            {
                var optimizer = tf.train.GradientDescentOptimizer(learning_rate);
                train_step    = optimizer.minimize(cross_entropy_mean);
            });

            return(train_step, cross_entropy_mean, bottleneck_input, ground_truth_input,
                   final_tensor);
        }
예제 #30
0
        public Graph BuildGraph()
        {
            var graph = new Graph().as_default();

            tf_with(tf.name_scope("define_input"), scope =>
            {
                input_data   = tf.placeholder(dtype: tf.float32, name: "input_data");
                label_sbbox  = tf.placeholder(dtype: tf.float32, name: "label_sbbox");
                label_mbbox  = tf.placeholder(dtype: tf.float32, name: "label_mbbox");
                label_lbbox  = tf.placeholder(dtype: tf.float32, name: "label_lbbox");
                true_sbboxes = tf.placeholder(dtype: tf.float32, name: "sbboxes");
                true_mbboxes = tf.placeholder(dtype: tf.float32, name: "mbboxes");
                true_lbboxes = tf.placeholder(dtype: tf.float32, name: "lbboxes");
                trainable    = tf.placeholder(dtype: tf.@bool, name: "training");
            });

            tf_with(tf.name_scope("define_loss"), scope =>
            {
                model   = new YOLOv3(cfg, input_data, trainable);
                net_var = tf.global_variables();
                (giou_loss, conf_loss, prob_loss) = model.compute_loss(
                    label_sbbox, label_mbbox, label_lbbox,
                    true_sbboxes, true_mbboxes, true_lbboxes);
                loss = giou_loss + conf_loss + prob_loss;
            });

            Tensor global_step_update = null;

            tf_with(tf.name_scope("learn_rate"), scope =>
            {
                global_step      = tf.Variable(1.0, dtype: tf.float64, trainable: false, name: "global_step");
                var warmup_steps = tf.constant(warmup_periods * steps_per_period,
                                               dtype: tf.float64, name: "warmup_steps");
                var train_steps = tf.constant((first_stage_epochs + second_stage_epochs) * steps_per_period,
                                              dtype: tf.float64, name: "train_steps");

                learn_rate = tf.cond(
                    pred: global_step < warmup_steps,
                    true_fn: delegate
                {
                    return(global_step / warmup_steps * learn_rate_init);
                },
                    false_fn: delegate
                {
                    return(learn_rate_end + 0.5 * (learn_rate_init - learn_rate_end) *
                           (1 + tf.cos(
                                (global_step - warmup_steps) / (train_steps - warmup_steps) * Math.PI)));
                }
                    );

                global_step_update = tf.assign_add(global_step, 1.0f);
            });

            Operation moving_ave = null;

            tf_with(tf.name_scope("define_weight_decay"), scope =>
            {
                var emv    = tf.train.ExponentialMovingAverage(moving_ave_decay);
                var vars   = tf.trainable_variables().Select(x => (RefVariable)x).ToArray();
                moving_ave = emv.apply(vars);
            });

            tf_with(tf.name_scope("define_first_stage_train"), scope =>
            {
            });

            return(graph);
        }