protected override void build(TensorShape input_shape) { embeddings = add_weight(shape: new int[] { input_dim, output_dim }, initializer: embeddings_initializer, name: "embeddings"); built = true; }
public override Operation _apply_sparse(IndexedSlices grad, RefVariable var) { return(_apply_sparse_shared(grad.values, var, grad.indices, (x, i, v) => { return state_ops.scatter_add(x, i, v, use_locking: _use_locking); })); }
public _InitializeClustersOpFactory(Tensor[] inputs, Tensor num_clusters, string initial_clusters, string distance_metric, int random_seed, int kmeans_plus_plus_num_retries, int kmc2_chain_length, RefVariable cluster_centers, RefVariable cluster_centers_updated, RefVariable cluster_centers_initialized) { _inputs = inputs; _num_clusters = num_clusters; _initial_clusters = initial_clusters; _distance_metric = distance_metric; _random_seed = random_seed; _kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries; _kmc2_chain_length = kmc2_chain_length; _cluster_centers = cluster_centers; _cluster_centers_updated = cluster_centers_updated; _cluster_centers_initialized = cluster_centers_initialized; _num_selected = array_ops.shape(_cluster_centers).slice(0); _num_remaining = _num_clusters - _num_selected; _num_data = math_ops.add_n(_inputs.Select(i => array_ops.shape(i).slice(0)).ToArray()); }
protected override void build(TensorShape input_shape) { var last_dim = input_shape.dims.Last(); var axes = new Dictionary <int, int>(); axes[-1] = last_dim; input_spec = new InputSpec(min_ndim: 2, axes: axes); kernel = add_weight( "kernel", shape: new int[] { last_dim, units }, initializer: kernel_initializer, dtype: _dtype, trainable: true); if (use_bias) { bias = add_weight( "bias", shape: new int[] { units }, initializer: bias_initializer, dtype: _dtype, trainable: true); } built = true; }
public VdCnn(int alphabet_size, int document_max_len, int num_class) { embedding_size = 16; filter_sizes = new int[] { 3, 3, 3, 3, 3 }; num_filters = new int[] { 64, 64, 128, 256, 512 }; num_blocks = new int[] { 2, 2, 2, 2 }; learning_rate = 0.001f; cnn_initializer = tf.keras.initializers.he_normal(); x = tf.placeholder(tf.int32, new TensorShape(-1, document_max_len), name: "x"); y = tf.placeholder(tf.int32, new TensorShape(-1), name: "y"); is_training = tf.placeholder(tf.boolean, new TensorShape(), name: "is_training"); global_step = tf.Variable(0, trainable: false); // Embedding Layer with(tf.name_scope("embedding"), delegate { var init_embeddings = tf.random_uniform(new int[] { alphabet_size, embedding_size }, -1.0f, 1.0f); embeddings = tf.get_variable("embeddings", initializer: init_embeddings); x_emb = tf.nn.embedding_lookup(embeddings, x); x_expanded = tf.expand_dims(x_emb, -1); }); // First Convolution Layer with(tf.variable_scope("conv-0"), delegate { var conv0 = tf.layers.conv2d(x_expanded, filters: num_filters[0], kernel_size: new int[] { filter_sizes[0], embedding_size }, kernel_initializer: cnn_initializer, activation: tf.nn.relu); }); }
private RefVariable[] _create_variables(Tensor num_clusters) { var init_value = constant_op.constant(new float[0], dtype: TF_DataType.TF_FLOAT); var cluster_centers = tf.Variable(init_value, name: CLUSTERS_VAR_NAME, validate_shape: false); var cluster_centers_initialized = tf.Variable(false, dtype: TF_DataType.TF_BOOL, name: "initialized"); RefVariable update_in_steps = null; if (_use_mini_batch && _mini_batch_steps_per_iteration > 1) { throw new NotImplementedException("KMeans._create_variables"); } else { var cluster_centers_updated = cluster_centers; var ones = array_ops.ones(new Tensor[] { num_clusters }, dtype: TF_DataType.TF_INT64); var cluster_counts = _use_mini_batch ? tf.Variable(ones) : null; return(new RefVariable[] { cluster_centers, cluster_centers_initialized, cluster_counts, cluster_centers_updated, update_in_steps }); } }
private Operation _apply_sparse_shared(Tensor grad, RefVariable var, Tensor indices, Func <RefVariable, Tensor, Tensor, Tensor> scatter_add) { var(beta1_power_v, beta2_power_v) = _get_beta_accumulators(); Tensor beta1_power = math_ops.cast(beta1_power_v, var.dtype.as_base_dtype()); Tensor beta2_power = math_ops.cast(beta2_power_v, var.dtype.as_base_dtype()); var lr_t = math_ops.cast(_lr_t, var.dtype.as_base_dtype()); var beta1_t = math_ops.cast(_beta1_t, var.dtype.as_base_dtype()); var beta2_t = math_ops.cast(_beta2_t, var.dtype.as_base_dtype()); var epsilon_t = math_ops.cast(_epsilon_t, var.dtype.as_base_dtype()); var lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)); var m = get_slot(var, "m"); var m_scaled_g_values = grad * (1 - beta1_t); var m_t = state_ops.assign(m, m * beta1_t, use_locking: _use_locking); with(ops.control_dependencies(new[] { m_t }), delegate { m_t = scatter_add(m, indices, m_scaled_g_values); }); var v = get_slot(var, "v"); var v_scaled_g_values = (grad * grad) * (1 - beta2_t); var v_t = state_ops.assign(v, v * beta2_t, use_locking: _use_locking); with(ops.control_dependencies(new[] { v_t }), delegate { v_t = scatter_add(v, indices, v_scaled_g_values); }); var v_sqrt = math_ops.sqrt(v_t); var var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking: _use_locking); return(control_flow_ops.group(new[] { var_update, m_t, v_t })); }
public Tensor __call__(RefVariable step) { return(tf_with(ops.name_scope(name ?? "PolynomialDecay"), scope => { name = scope; var initial_learning_rate_tensor = ops.convert_to_tensor(initial_learning_rate, name: "initial_learning_rate"); var dtype = initial_learning_rate_tensor.dtype; var end_learning_rate_tensor = math_ops.cast(end_learning_rate, dtype); var power_tensor = math_ops.cast(power, dtype); var global_step_recomp = math_ops.cast(step, dtype); var decay_steps_recomp = math_ops.cast(decay_steps, dtype); if (cycle) { throw new NotImplementedException("PolynomialDecay cycle"); } else { // Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps); } var p = tf.divide(global_step_recomp, decay_steps_recomp); var pow = tf.pow(1 - p, power_tensor); var m = math_ops.multiply(initial_learning_rate_tensor - end_learning_rate_tensor, pow); return math_ops.add(m, end_learning_rate_tensor, name: name); })); }
public Tensor _assign_moving_average(RefVariable variable, Tensor value, Tensor momentum) { return(tf_with(ops.name_scope(null, "AssignMovingAvg", new { variable, value, momentum }), scope => { // var cm = ops.colocate_with(variable); var decay = ops.convert_to_tensor(1.0f - momentum, name: "decay"); var update_delta = (variable - math_ops.cast(value, variable.dtype)) * decay; return state_ops.assign_sub(variable, update_delta, name: scope); })); }
/// <summary> /// Create a slot initialized to the given value. /// </summary> /// <param name="primary"></param> /// <param name="val"></param> /// <param name="name"></param> /// <param name="colocate_with_primary"></param> /// <returns></returns> public RefVariable create_slot(RefVariable primary, Tensor val, string name, bool colocate_with_primary = true) { var validate_shape = val.TensorShape.is_fully_defined(); var prefix = primary.Op.name; return(tf_with(tf.variable_scope(name: null, prefix + "/" + name), delegate { return _create_slot_var(primary, val, "", validate_shape, null, TF_DataType.DtInvalid); })); }
/// <summary> /// Creates a slot initialized using an `Initializer`. /// </summary> /// <returns></returns> public RefVariable create_slot_with_initializer(RefVariable primary, IInitializer initializer, TensorShape shape, TF_DataType dtype, string name, bool colocate_with_primary = true) { var validate_shape = shape.is_fully_defined(); var prefix = primary.Op.name; return(tf_with(new variable_scope(string.Empty, prefix + "/" + name), delegate { return _create_slot_var(primary, initializer, "", validate_shape, shape, dtype); })); }
public static Tensor Relu(Tensor x) { using (tf.name_scope("relu")) { int[] w_shape = { x.shape[1], 1 }; RefVariable w = tf.Variable(tf.random_normal(w_shape), name: "weights"); RefVariable b = tf.Variable(0.0f, name: "bias"); Tensor z = tf.add(tf.matmul(x, w), b); return(tf.maximum(z, 0, "relu")); } }
public Tensor __call__(Tensor inp, RefVariable filter) { return(conv_op(new { input = inp, filter, strides, padding, data_format, name })); }
public Tensor __call__(Tensor inp, RefVariable filter) { return(conv_op(new Conv2dParams { Input = inp, Filter = filter, Strides = strides, Padding = padding, DataFormat = data_format, Name = name })); }
public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null) { var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new { var, alpha, delta, use_locking }); return(_op.outputs[0]); }
public ILayer __build__(TensorShape input_shape, int seed = 1, float stddev = -1f) { Console.WriteLine("Building Layer \"" + name + "\" ..."); if (stddev == -1) { stddev = (float)(1 / Math.Sqrt(2)); } var dim = input_shape.dims; var input_dim = dim[dim.Length - 1]; W = tf.Variable(create_tensor(new int[] { input_dim, units }, seed: seed, stddev: (float)stddev)); WShape = new TensorShape(W.shape); return(this); }
/// <summary> /// Compute the moving average of a variable. /// </summary> /// <param name="variable"></param> /// <param name="value"></param> /// <param name="decay"></param> /// <param name="zero_debias"></param> /// <param name="name"></param> /// <returns></returns> public static Tensor assign_moving_average(RefVariable variable, RefVariable value, Tensor decay, bool zero_debias = true, string name = null) { return(tf_with(ops.name_scope(name, "AssignMovingAvg", new { variable, value, decay }), scope => { decay = ops.convert_to_tensor(1.0f - decay, name: "decay"); if (decay.dtype != variable.dtype.as_base_dtype()) { decay = math_ops.cast(decay, variable.dtype.as_base_dtype()); } return state_ops.assign_sub(variable, (variable - value) * decay, name: scope); })); }
protected override void build(TensorShape input_shape) { int channel_axis = data_format == "channels_first" ? 1 : -1; int input_dim = channel_axis < 0 ? input_shape.dims[input_shape.ndim + channel_axis] : input_shape.dims[channel_axis]; var kernel_shape = new int[] { kernel_size[0], kernel_size[1], input_dim, filters }; kernel = add_weight(name: "kernel", shape: kernel_shape, initializer: kernel_initializer, trainable: true, dtype: _dtype); if (use_bias) { bias = add_weight(name: "bias", shape: new int[] { filters }, initializer: bias_initializer, trainable: true, dtype: _dtype); } var axes = new Dictionary <int, int>(); axes.Add(-1, input_dim); input_spec = new InputSpec(ndim: rank + 2, axes: axes); string op_padding; if (padding == "causal") { op_padding = "valid"; } else { op_padding = padding; } var df = conv_utils.convert_data_format(data_format, rank + 2); _convolution_op = nn_ops.Convolution(input_shape, kernel.shape, op_padding.ToUpper(), strides, dilation_rate, data_format: df); built = true; }
private void variable_summaries(RefVariable var) { tf_with(tf.name_scope("summaries"), delegate { var mean = tf.reduce_mean(var); tf.summary.scalar("mean", mean); Tensor stddev = null; tf_with(tf.name_scope("stddev"), delegate { stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))); }); tf.summary.scalar("stddev", stddev); tf.summary.scalar("max", tf.reduce_max(var)); tf.summary.scalar("min", tf.reduce_min(var)); tf.summary.histogram("histogram", var); }); }
void loadSession() { //TextAsset graphModel = Resources.Load("model.pb") as TextAsset; //graph = new Graph(); //graph.Import(graphModel.bytes); //sess = new Session(graph); sess = new Session(); // tf Graph Input X = tf.placeholder(tf.float32); Y = tf.placeholder(tf.float32); //W = tf.Variable(0.0317f, dtype: tf.float32, name: "weight"); //b = tf.Variable( -0.125f, dtype: tf.float32, name: "bias"); W = tf.Variable(0.3f, dtype: tf.float32, name: "weight"); b = tf.Variable(-0.1f, dtype: tf.float32, name: "bias"); // Construct a linear model pred = tf.add(tf.multiply(X, W), b); }
// Example of how to create a neuron layer from scratch, use tf.layers.dense instead public static Tensor NeuronLayer(Tensor X, int nNeurons, string name, IActivation activation = null) { using (tf.name_scope(name)) { int nInputs = X.shape[1]; NDArray stddev = 2 / np.sqrt(nInputs); Tensor init = tf.truncated_normal(new[] { nInputs, nNeurons }, stddev: stddev); RefVariable W = tf.Variable(init, name: "kernel"); RefVariable b = tf.Variable(tf.zeros(new[] { nNeurons }), name: "bias"); Tensor Z = tf.matmul(X, W) + b; if (activation != null) { return(activation.Activate(Z)); } return(Z); } }
public override Operation _apply_dense(Tensor grad, RefVariable var) { var m = get_slot(var, "m"); var v = get_slot(var, "v"); var(beta1_power, beta2_power) = _get_beta_accumulators(); return(gen_training_ops.apply_adam( var, m, v, math_ops.cast(beta1_power, var.dtype.as_base_dtype()), math_ops.cast(beta2_power, var.dtype.as_base_dtype()), math_ops.cast(_lr_t, var.dtype.as_base_dtype()), math_ops.cast(_beta1_t, var.dtype.as_base_dtype()), math_ops.cast(_beta2_t, var.dtype.as_base_dtype()), math_ops.cast(_epsilon_t, var.dtype.as_base_dtype()), grad, use_locking: _use_locking).op); }
/// <summary> /// Create a slot initialized to 0 with same shape as the primary object. /// </summary> /// <param name="primary"></param> /// <param name="name"></param> /// <param name="dtype"></param> /// <param name="colocate_with_primary"></param> /// <returns></returns> public RefVariable create_zeros_slot(RefVariable primary, string name, TF_DataType dtype = TF_DataType.DtInvalid, bool colocate_with_primary = true) { if (dtype == TF_DataType.DtInvalid) { dtype = primary.dtype; } var slot_shape = primary.shape; if (slot_shape.is_fully_defined()) { var initializer = new Zeros(); return(create_slot_with_initializer( primary, initializer, slot_shape, dtype, name, colocate_with_primary: colocate_with_primary)); } else { throw new NotImplementedException("create_zeros_slot is not fully defined."); } }
public static Tensor apply_adam(RefVariable var, RefVariable m, RefVariable v, Tensor beta1_power, Tensor beta2_power, Tensor lr, Tensor beta1, Tensor beta2, Tensor epsilon, Tensor grad, bool use_locking = false, bool use_nesterov = false, string name = null) { var _op = _op_def_lib._apply_op_helper("ApplyAdam", name, new { var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov }); return(_op.outputs[0]); }
public static RefVariable get_global_step(Graph graph = null) { graph = graph ?? ops.get_default_graph(); RefVariable global_step_tensor = null; var global_step_tensors = graph.get_collection <RefVariable>(tf.GraphKeys.GLOBAL_STEP); if (global_step_tensors.Count == 1) { global_step_tensor = global_step_tensors[0]; } else { try { global_step_tensor = graph.get_tensor_by_name("global_step:0"); } catch (KeyError) { return(null); } } return(global_step_tensor); }
public Tensor __call__(Tensor inp, RefVariable filter) { return(call.__call__(inp, filter)); }
public VdCnn(int alphabet_size, int document_max_len, int num_class) { embedding_size = 16; filter_sizes = new int[] { 3, 3, 3, 3, 3 }; num_filters = new int[] { 64, 64, 128, 256, 512 }; num_blocks = new int[] { 2, 2, 2, 2 }; learning_rate = 0.001f; cnn_initializer = tensorflow.keras.initializers.he_normal(); fc_initializer = tf.truncated_normal_initializer(stddev: 0.05f); x = tf.placeholder(tf.int32, new TensorShape(-1, document_max_len), name: "x"); y = tf.placeholder(tf.int32, new TensorShape(-1), name: "y"); is_training = tf.placeholder(tf.@bool, new TensorShape(), name: "is_training"); global_step = tf.Variable(0, trainable: false); // Embedding Layer tf_with(tf.name_scope("embedding"), delegate { var init_embeddings = tf.random_uniform(new int[] { alphabet_size, embedding_size }, -1.0f, 1.0f); embeddings = tf.get_variable("embeddings", initializer: init_embeddings); x_emb = tf.nn.embedding_lookup(embeddings, x); x_expanded = tf.expand_dims(x_emb, -1); }); Tensor conv0 = null; Tensor conv1 = null; Tensor conv2 = null; Tensor conv3 = null; Tensor conv4 = null; Tensor h_flat = null; Tensor fc1_out = null; Tensor fc2_out = null; // First Convolution Layer tf_with(tf.variable_scope("conv-0"), delegate { conv0 = tf.layers.conv2d(x_expanded, filters: num_filters[0], kernel_size: new int[] { filter_sizes[0], embedding_size }, kernel_initializer: cnn_initializer, activation: tf.nn.relu()); conv0 = tf.transpose(conv0, new int[] { 0, 1, 3, 2 }); }); tf_with(tf.name_scope("conv-block-1"), delegate { conv1 = conv_block(conv0, 1); }); tf_with(tf.name_scope("conv-block-2"), delegate { conv2 = conv_block(conv1, 2); }); tf_with(tf.name_scope("conv-block-3"), delegate { conv3 = conv_block(conv2, 3); }); tf_with(tf.name_scope("conv-block-4"), delegate { conv4 = conv_block(conv3, 4, max_pool: false); }); // ============= k-max Pooling ============= tf_with(tf.name_scope("k-max-pooling"), delegate { var h = tf.transpose(tf.squeeze(conv4, new int[] { -1 }), new int[] { 0, 2, 1 }); var top_k = tf.nn.top_k(h, k: 8, sorted: false)[0]; h_flat = tf.reshape(top_k, new int[] { -1, 512 * 8 }); }); // ============= Fully Connected Layers ============= tf_with(tf.name_scope("fc-1"), scope => { fc1_out = tf.layers.dense(h_flat, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer); }); tf_with(tf.name_scope("fc-2"), scope => { fc2_out = tf.layers.dense(fc1_out, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer); }); tf_with(tf.name_scope("fc-3"), scope => { logits = tf.layers.dense(fc2_out, num_class, activation: null, kernel_initializer: fc_initializer); predictions = tf.argmax(logits, -1, output_type: tf.int32); }); // ============= Loss and Accuracy ============= tf_with(tf.name_scope("loss"), delegate { var y_one_hot = tf.one_hot(y, num_class); loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot)); var update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) as List <object>; tf_with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate { var adam = tf.train.AdamOptimizer(learning_rate); adam.minimize(loss, global_step: global_step); }); }); }
protected override void build(TensorShape input_shape) { var ndims = input_shape.ndim; foreach (var(idx, x) in enumerate(axis)) { if (x < 0) { axis[idx] = ndims + x; } } if (fused) { if (Enumerable.SequenceEqual(axis, new int[] { 3 })) { _data_format = "NHWC"; } } var param_dtype = _dtype == TF_DataType.DtInvalid ? TF_DataType.TF_FLOAT : _dtype; var param_shape = new int[] { input_shape.dims[axis[0]] }; if (scale) { gamma = add_weight("gamma", param_shape, dtype: param_dtype, initializer: gamma_initializer, trainable: true); } else { throw new NotImplementedException("add_weight gamma"); } if (center) { beta = add_weight("beta", param_shape, dtype: param_dtype, initializer: beta_initializer, trainable: true); } else { throw new NotImplementedException("add_weight beta"); } if (_scope != null) { } moving_mean = (RefVariable)add_weight("moving_mean", param_shape, dtype: param_dtype, initializer: moving_mean_initializer, synchronization: VariableSynchronization.OnRead, trainable: false, aggregation: VariableAggregation.Mean); moving_variance = (RefVariable)add_weight("moving_variance", shape: param_shape, dtype: param_dtype, initializer: moving_variance_initializer, synchronization: VariableSynchronization.OnRead, trainable: false, aggregation: VariableAggregation.Mean); if (renorm) { throw new NotImplementedException("build when renorm is true"); } built = true; }
/// <summary> /// Adds a new softmax and fully-connected layer for training and eval. /// /// We need to retrain the top layer to identify our new classes, so this function /// adds the right operations to the graph, along with some variables to hold the /// weights, and then sets up all the gradients for the backward pass. /// /// The set up for the softmax and fully-connected layers is based on: /// https://www.tensorflow.org/tutorials/mnist/beginners/index.html /// </summary> /// <param name="class_count"></param> /// <param name="final_tensor_name"></param> /// <param name="bottleneck_tensor"></param> /// <param name="quantize_layer"></param> /// <param name="is_training"></param> /// <returns></returns> private (Operation, Tensor, Tensor, Tensor, Tensor) add_final_retrain_ops(int class_count, string final_tensor_name, Tensor bottleneck_tensor, bool quantize_layer, bool is_training) { var(batch_size, bottleneck_tensor_size) = (bottleneck_tensor.TensorShape.dims[0], bottleneck_tensor.TensorShape.dims[1]); tf_with(tf.name_scope("input"), scope => { bottleneck_input = tf.placeholder_with_default( bottleneck_tensor, shape: bottleneck_tensor.TensorShape.dims, name: "BottleneckInputPlaceholder"); ground_truth_input = tf.placeholder(tf.int64, new TensorShape(batch_size), name: "GroundTruthInput"); }); // Organizing the following ops so they are easier to see in TensorBoard. string layer_name = "final_retrain_ops"; Tensor logits = null; tf_with(tf.name_scope(layer_name), scope => { RefVariable layer_weights = null; tf_with(tf.name_scope("weights"), delegate { var initial_value = tf.truncated_normal(new int[] { bottleneck_tensor_size, class_count }, stddev: 0.001f); layer_weights = tf.Variable(initial_value, name: "final_weights"); variable_summaries(layer_weights); }); RefVariable layer_biases = null; tf_with(tf.name_scope("biases"), delegate { layer_biases = tf.Variable(tf.zeros(new TensorShape(class_count)), name: "final_biases"); variable_summaries(layer_biases); }); tf_with(tf.name_scope("Wx_plus_b"), delegate { logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases; tf.summary.histogram("pre_activations", logits); }); }); final_tensor = tf.nn.softmax(logits, name: final_tensor_name); // The tf.contrib.quantize functions rewrite the graph in place for // quantization. The imported model graph has already been rewritten, so upon // calling these rewrites, only the newly added final layer will be // transformed. if (quantize_layer) { throw new NotImplementedException("quantize_layer"); /*if (is_training) * tf.contrib.quantize.create_training_graph(); * else * tf.contrib.quantize.create_eval_graph();*/ } tf.summary.histogram("activations", final_tensor); // If this is an eval graph, we don't need to add loss ops or an optimizer. if (!is_training) { return(null, null, bottleneck_input, ground_truth_input, final_tensor); } Tensor cross_entropy_mean = null; tf_with(tf.name_scope("cross_entropy"), delegate { cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels: ground_truth_input, logits: logits); }); tf.summary.scalar("cross_entropy", cross_entropy_mean); tf_with(tf.name_scope("train"), delegate { var optimizer = tf.train.GradientDescentOptimizer(learning_rate); train_step = optimizer.minimize(cross_entropy_mean); }); return(train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, final_tensor); }
public Graph BuildGraph() { var graph = new Graph().as_default(); tf_with(tf.name_scope("define_input"), scope => { input_data = tf.placeholder(dtype: tf.float32, name: "input_data"); label_sbbox = tf.placeholder(dtype: tf.float32, name: "label_sbbox"); label_mbbox = tf.placeholder(dtype: tf.float32, name: "label_mbbox"); label_lbbox = tf.placeholder(dtype: tf.float32, name: "label_lbbox"); true_sbboxes = tf.placeholder(dtype: tf.float32, name: "sbboxes"); true_mbboxes = tf.placeholder(dtype: tf.float32, name: "mbboxes"); true_lbboxes = tf.placeholder(dtype: tf.float32, name: "lbboxes"); trainable = tf.placeholder(dtype: tf.@bool, name: "training"); }); tf_with(tf.name_scope("define_loss"), scope => { model = new YOLOv3(cfg, input_data, trainable); net_var = tf.global_variables(); (giou_loss, conf_loss, prob_loss) = model.compute_loss( label_sbbox, label_mbbox, label_lbbox, true_sbboxes, true_mbboxes, true_lbboxes); loss = giou_loss + conf_loss + prob_loss; }); Tensor global_step_update = null; tf_with(tf.name_scope("learn_rate"), scope => { global_step = tf.Variable(1.0, dtype: tf.float64, trainable: false, name: "global_step"); var warmup_steps = tf.constant(warmup_periods * steps_per_period, dtype: tf.float64, name: "warmup_steps"); var train_steps = tf.constant((first_stage_epochs + second_stage_epochs) * steps_per_period, dtype: tf.float64, name: "train_steps"); learn_rate = tf.cond( pred: global_step < warmup_steps, true_fn: delegate { return(global_step / warmup_steps * learn_rate_init); }, false_fn: delegate { return(learn_rate_end + 0.5 * (learn_rate_init - learn_rate_end) * (1 + tf.cos( (global_step - warmup_steps) / (train_steps - warmup_steps) * Math.PI))); } ); global_step_update = tf.assign_add(global_step, 1.0f); }); Operation moving_ave = null; tf_with(tf.name_scope("define_weight_decay"), scope => { var emv = tf.train.ExponentialMovingAverage(moving_ave_decay); var vars = tf.trainable_variables().Select(x => (RefVariable)x).ToArray(); moving_ave = emv.apply(vars); }); tf_with(tf.name_scope("define_first_stage_train"), scope => { }); return(graph); }