C# (CSharp) LayersApi Exemples

Langage de programmation: C# (CSharp)

Class/Type: LayersApi

Exemples au hotexamples.com: 1

C# (CSharp) LayersApi - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de LayersApi extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Dense(8)

Conv2D(5)

Flatten(5)

Dropout(1)

Input(1)

MaxPooling2D(1)

Méthodes fréquemment utilisées

Dense (8)

Conv2D (5)

Flatten (5)

Dropout (1)

Input (1)

MaxPooling2D (1)

Associées

TypedClientBuilder

IMonitorHelper

CWE369_Divide_by_Zero__int_Get_Cookies_Web_modulo_67b

TheWatcher

CommandConfiguration

IChangeLogItem

GridNumericColumnEditor

TiledOverlay

IGameWrapper

IEmailUtil

Related in langs

TestTestController (PHP)

File (PHP)

sqlite3ErrName (C++)

gt_n_r_encseq_delete (C++)

SendMessage (Go)

NewPacketWriter (Go)

BPlusTreeIntToString60 (Java)

ButtonType.PREVIEW (Java)

IQConnectSocketClient (Python)

enable (Python)

Exemple #1

0

Afficher le fichier

Fichier : Program.cs Projet : samuelcaldas/Actor-Critic-Cartpole-Keras

static void Main(string[] args) { // Configuration parameters for the whole setup var seed = 42; var gamma = 0.99; // Discount factor for past rewards var max_steps_per_episode = 10000; // var env = gym.make("CartPole-v0"); // Create the environment CartPoleEnv env = new CartPoleEnv(WinFormEnvViewer.Factory); // Create the environment env.Seed(seed); // var eps = np.finfo(np.float32).eps.item(); // Smallest number such that 1.0 + eps != 1.0 var eps = 1e-5; // Smallest number such that 1.0 + eps != 1.0 /*/ * //// Implement Actor Critic network * * This network learns two functions: * * 1. Actor: This takes as input the state of our environment and returns a * probability value for each action in its action space. * 2. Critic: This takes as input the state of our environment and returns * an estimate of total rewards in the future. * * In our implementation, they share the initial layer. * /*/ var num_inputs = 4; NDArray num_actions = 2; var num_hidden = 128; LayersApi layers = new LayersApi(); var inputs = layers.Input(shape: (num_inputs)); var common = layers.Dense(num_hidden, activation: "relu").Apply(inputs); var action = layers.Dense(num_actions, activation: "softmax").Apply(common); var critic = layers.Dense(1).Apply(common); Model model = keras.Model(inputs: inputs, outputs: (action, critic)); /*/ * //// Train * /*/ var optimizer = keras.optimizers.Adam(learning_rate: (float)0.01); var huber_loss = keras.losses.Huber(); var action_probs_history = new List <double>(); var critic_value_history = new List <dynamic>(); var rewards_history = new List <double>(); double running_reward = 0; var episode_count = 0; while (true) // Run until solved { Program.state = env.Reset(); double episode_reward = 0; using (var tape = tf.GradientTape()) { for (int timestep = 1; timestep < max_steps_per_episode; timestep++) { //env.Render(); // Adding this line would show the attempts // of the agent in a pop up window. Program.state = tf.convert_to_tensor(Program.state); Program.state = tf.expand_dims(Program.state, 0); // Predict action probabilities and estimated future rewards // from environment state // var (action_probs, critic_value) = model.Apply(Program.state); var pred_result = model.Apply(tf.cast(Program.state, tf.float32)); var action_probs = pred_result[0][0]; var critic_value = pred_result[1][0][0]; critic_value_history.Add(critic_value); Tensor probabilities = np.squeeze(action_probs); Console.WriteLine(probabilities); // Sample action from action probability distribution NDArray chosen_action = np.random.choice(num_actions, probabilities: probabilities.); action_probs_history.Add(tf.math.log(action_probs[0, chosen_action])); // Apply the sampled action in our environment var(state, reward, done, _) = env.Step(chosen_action); rewards_history.Add(reward); episode_reward += reward; if (done) { break; } } // Update running reward to check condition for solving running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward; // Calculate expected value from rewards // - At each timestep what was the total reward received after that timestep // - Rewards in the past are discounted by multiplying them with gamma // - These are the labels for our critic dynamic returns = new List <double>(); double discounted_sum = 0; var reverse_rewards_history = rewards_history; reverse_rewards_history.Reverse(); foreach (double r in reverse_rewards_history) { discounted_sum = r + gamma * discounted_sum; returns.Insert(0, discounted_sum); } // Normalize returns = np.array(returns.ToArray()); returns = (returns - np.mean(returns)) / (np.std(returns) + eps); returns = returns.ToList(); // Calculating loss values to update our network var history = zip(action_probs_history, critic_value_history, returns); var actor_losses = new List <double>(); var critic_losses = new List <double>(); foreach (double[] item in history) { var log_prob = item[0]; dynamic value = item[1]; dynamic ret = item[2]; // At this point in history, the critic estimated that we would get a // total reward = `value` in the future. We took an action with log probability // of `log_prob` and ended up recieving a total reward = `ret`. // The actor must be updated so that it predicts an action that leads to // high rewards (compared to critic's estimate) with high probability. var diff = ret - value; actor_losses.Add(-log_prob * diff); // actor loss // The critic must be updated so that it predicts a better estimate of // the future rewards. critic_losses.Add( huber_loss.Call(tf.expand_dims(value, 0), tf.expand_dims(ret, 0)) ); } // Backpropagation dynamic loss_value = actor_losses.Sum(x => Convert.ToDouble(x)) + critic_losses.Sum(x => Convert.ToDouble(x)); var grads = tape.gradient(loss_value, model.trainable_variables); optimizer.apply_gradients(zip(grads, model.trainable_variables)); // Clear the loss and reward history action_probs_history.Clear(); critic_value_history.Clear(); rewards_history.Clear(); } // Log details episode_count += 1; if (episode_count % 10 == 0) { var template = String.Format("running reward: {0} at episode {1}", running_reward, episode_count); Console.WriteLine(template); } if (running_reward > 195) // Condition to consider the task solved { Console.WriteLine(String.Format("Solved at episode {0}!", episode_count)); break; } } /*/ * //// Visualizations * In early stages of training: * ![Imgur](https://i.imgur.com/5gCs5kH.gif) * * In later stages of training: * ![Imgur](https://i.imgur.com/5ziiZUD.gif) * /*/ }