public void GetQueryable() { var service = new MemoryCollectionFactory(); var result1 = service.GetOrCreateCollection <string>(); result1.Add("Foo"); var result2 = service.GetOrCreateCollection <string>(); result2.Add("Bar"); Assert.Equal(result1.Count, result2.Count); }
/// <summary> /// The Run method provides the main loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha); int nIteration = 1; double dfRunningReward = 0; double dfEpisodeReward = 0; int nEpisode = 0; bool bDifferent = false; StateBase state = getData(phase, -1, -1); // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); // Set the initial target model to the current model. m_brain.UpdateTargetModel(); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Forward the policy network and sample an action. int action = m_brain.act(x, state.Clip, state.ActionCount); // Take the next step using the action StateBase state_next = getData(phase, action, nIteration); // Preprocess the next observation. SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent); if (!bDifferent) { m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!"); } // Build up episode memory, using reward for taking the action. iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode)); dfEpisodeReward += state_next.Reward; // Do the training if (iMemory.Count > m_brain.BatchSize) { double dfBeta = beta_by_frame(nIteration); MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta); m_brain.Train(nIteration, rgSamples, state.ActionCount); iMemory.Update(rgSamples); if (nIteration % m_nUpdateTargetFreq == 0) { m_brain.UpdateTargetModel(); } } if (state_next.Done) { // Update reward running dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; nEpisode++; updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated()); state = getData(phase, -1, -1); x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); dfEpisodeReward = 0; } else { state = state_next; x = x_next; } nIteration++; } iMemory.CleanUp(); }