Exemplos de MyCaffe.trainers.pg.st MemoryCollection em C# (CSharp)

Linguagem de programação: C# (CSharp)

Espaço para nome / nome do pacote: MyCaffe.trainers.pg.st

Classe / Tipo: MemoryCollection

Exemplos em hotexamples.com: 2

MyCaffe.trainers.pg.st MemoryCollection em C# (CSharp) - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de MyCaffe.trainers.pg.st.MemoryCollection em C# (CSharp) extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Add(1)

Clear(1)

GetActionOneHotVectors(1)

GetActionProbabilities(1)

GetClip(1)

GetData(1)

GetDiscountedRewards(1)

Métodos Frequentes

Add (1)

Clear (1)

GetActionOneHotVectors (1)

GetActionProbabilities (1)

GetClip (1)

GetData (1)

GetDiscountedRewards (1)

Relacionados

MD5C

BOPDS_CommonBlock

OptionsV1Window

Pathfinding.StartEndModifier

NativeLibraryManager

IObjectStorageService

ConfigurationItemAttribute

AreaInstance8a

ID3DBlob**

Game

Related in langs

Restore (PHP)

ForumViewCount (PHP)

aTmp1 (C++)

parse_camlrunparam (C++)

YesNo (Go)

Run (Go)

SystemDate (Java)

Mirrorable (Java)

gain_ratio (Python)

Config (Python)

Exemplo n.º 1

0

Exibir arquivo

public int Reshape(MemoryCollection col) { int nNum = col.Count; int nChannels = col[0].Data.Channels; int nHeight = col[0].Data.Height; int nWidth = col[0].Data.Height; int nActionProbs = 1; int nFound = 0; for (int i = 0; i < m_net.output_blobs.Count; i++) { if (m_net.output_blobs[i].type != Blob <T> .BLOB_TYPE.LOSS) { int nCh = m_net.output_blobs[i].channels; nActionProbs = Math.Max(nCh, nActionProbs); nFound++; } } if (nFound == 0) { throw new Exception("Could not find a non-loss output! Your model should output the loss and the action probabilities."); } m_blobDiscountedR.Reshape(nNum, nActionProbs, 1, 1); m_blobPolicyGradient.Reshape(nNum, nActionProbs, 1, 1); m_blobActionOneHot.Reshape(nNum, nActionProbs, 1, 1); m_blobDiscountedR1.Reshape(nNum, nActionProbs, 1, 1); m_blobPolicyGradient1.Reshape(nNum, nActionProbs, 1, 1); m_blobActionOneHot1.Reshape(nNum, nActionProbs, 1, 1); m_blobLoss.Reshape(1, 1, 1, 1); return(nActionProbs); }

Exemplo n.º 2

0

Exibir arquivo

/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { MemoryCollection m_rgMemory = new MemoryCollection(); double? dfRunningReward = null; double dfEpisodeReward = 0; int nEpisode = 0; int nIteration = 0; StateBase s = getData(phase, -1); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput); // Forward the policy network and sample an action. float[] rgfAprob; int action = m_brain.act(x, s.Clip, out rgfAprob); if (step == TRAIN_STEP.FORWARD) { return; } // Take the next step using the action StateBase s_ = getData(phase, action); dfEpisodeReward += s_.Reward; if (phase == Phase.TRAIN) { // Build up episode memory, using reward for taking the action. m_rgMemory.Add(new MemoryItem(s, x, action, rgfAprob, (float)s_.Reward)); // An episode has finished. if (s_.Done) { nEpisode++; nIteration++; m_brain.Reshape(m_rgMemory); // Compute the discounted reward (backwards through time) float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset); // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance) m_brain.SetDiscountedR(rgDiscountedR); // Get the action probabilities. float[] rgfAprobSet = m_rgMemory.GetActionProbabilities(); // The action probabilities are used to calculate the initial gradient within the loss function. m_brain.SetActionProbabilities(rgfAprobSet); // Get the action one-hot vectors. When using Softmax, this contains the one-hot vector containing // each action set (e.g. 3 actions with action 0 set would return a vector <1,0,0>). // When using a binary probability (e.g. with Sigmoid), the each action set only contains a // single element which is set to the action value itself (e.g. 0 for action '0' and 1 for action '1') float[] rgfAonehotSet = m_rgMemory.GetActionOneHotVectors(); m_brain.SetActionOneHotVectors(rgfAonehotSet); // Train for one iteration, which triggers the loss function. List <Datum> rgData = m_rgMemory.GetData(); List <Datum> rgClip = m_rgMemory.GetClip(); m_brain.SetData(rgData, rgClip); m_brain.Train(nIteration, step); // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); m_rgMemory.Clear(); if (step != TRAIN_STEP.NONE) { return; } } else { s = s_; } } else { if (s_.Done) { nEpisode++; // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); } else { s = s_; } nIteration++; } } }