Example #1
0
        public void GetQueryable()
        {
            var service = new MemoryCollectionFactory();

            var result1 = service.GetOrCreateCollection <string>();

            result1.Add("Foo");

            var result2 = service.GetOrCreateCollection <string>();

            result2.Add("Bar");

            Assert.Equal(result1.Count, result2.Count);
        }
Example #2
0
        /// <summary>
        /// The Run method provides the main loop that performs the following steps:
        /// 1.) get state
        /// 2.) build experience
        /// 3.) create policy gradients
        /// 4.) train on experiences
        /// </summary>
        /// <param name="phase">Specifies the phae.</param>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <param name="step">Specifies the training step to take, if any.  This is only used when debugging.</param>
        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha);
            int    nIteration         = 1;
            double dfRunningReward    = 0;
            double dfEpisodeReward    = 0;
            int    nEpisode           = 0;
            bool   bDifferent         = false;

            StateBase state = getData(phase, -1, -1);
            // Preprocess the observation.
            SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);

            // Set the initial target model to the current model.
            m_brain.UpdateTargetModel();

            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
            {
                // Forward the policy network and sample an action.
                int action = m_brain.act(x, state.Clip, state.ActionCount);

                // Take the next step using the action
                StateBase state_next = getData(phase, action, nIteration);

                // Preprocess the next observation.
                SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent);
                if (!bDifferent)
                {
                    m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!");
                }

                // Build up episode memory, using reward for taking the action.
                iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode));
                dfEpisodeReward += state_next.Reward;

                // Do the training
                if (iMemory.Count > m_brain.BatchSize)
                {
                    double           dfBeta    = beta_by_frame(nIteration);
                    MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta);
                    m_brain.Train(nIteration, rgSamples, state.ActionCount);
                    iMemory.Update(rgSamples);

                    if (nIteration % m_nUpdateTargetFreq == 0)
                    {
                        m_brain.UpdateTargetModel();
                    }
                }

                if (state_next.Done)
                {
                    // Update reward running
                    dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;

                    nEpisode++;
                    updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated());

                    state           = getData(phase, -1, -1);
                    x               = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
                    dfEpisodeReward = 0;
                }
                else
                {
                    state = state_next;
                    x     = x_next;
                }

                nIteration++;
            }

            iMemory.CleanUp();
        }