TrainingStats, RLDT C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : Policy.cs Projet : zeta1999/Reinforcement-Learning-Based-Decision-Tree

        /// <summary>
        /// Updates the network of states used for deciding the label of a future datavector. A training datavector
        /// is provided which contains all features, values, relative rewards, and the correct classification label.
        /// </summary>
        /// <param name="dataVector">A sample data point to learn from with features, values, relative rewards, and correct classification label.</param>
        /// <returns>The statistics of the learning process. See "TrainingStats" class for more details.</returns>
        public TrainingStats Learn(DataVectorTraining dataVector)
        {
            lock (processLock)
            {
                //Check datavector
                dataVector.Features.RemoveAll(p => p.Value == null);
                if (dataVector.Features.Count == 0 || dataVector.Label == null || dataVector.Label.Value == null)
                {
                    return(new TrainingStats());
                }

                //Clear current decision tree
                DecisionTree = null;

                //Training statistics
                TrainingStats trainingDetails = new TrainingStats();

                //Create root state, if it does not exist
                if (StateSpace.Count == 0)
                {
                    AddState(new State(dataVector), trainingDetails);
                }

                //Start with root state
                State rootState = StateSpace[0]; // 0 is the hashcode for a state with no features.
                Learn(rootState, dataVector, 0, trainingDetails);

                //Statistics
                trainingDetails.StatesTotal = StateSpace.Count;

                //Return
                return(trainingDetails);
            }
        }

Exemple #2

0

Afficher le fichier

Fichier : Policy.cs Projet : zeta1999/Reinforcement-Learning-Based-Decision-Tree

 //Methods - Training
 /// <summary>
 /// Adds a given state to the state space, then subscribes to the inner self-removal event.
 /// </summary>
 /// <param name="theState"></param>
 /// <param name="trainingDetails"></param>
 private void AddState(State theState, TrainingStats trainingDetails)
 {
     lock (processLock)
     {
         StateSpace.Add(theState.GetHashCode(), theState); trainingDetails.StatesCreated++;
     }
 }

Exemple #3

0

Afficher le fichier

Fichier : Policy.cs Projet : zeta1999/Reinforcement-Learning-Based-Decision-Tree

        /// <summary>
        /// Updates all queries in other states that lead to this state.
        /// </summary>
        /// <param name="nextState">The state that comes after the query is performed.</param>
        /// <param name="dataVector">The relevant datavector for trainging.</param>
        private void ParallelPathsUpdate(State nextState, DataVectorTraining dataVector, TrainingStats trainingDetails)
        {
            lock (processLock)
            {
                //Get current expected reward of label
                double nextStateLabelReward = nextState.Labels[dataVector.Label];

                //Adjust queries in states that point to this "nextState".
                List <FeatureValuePair> nextStateFeatures = nextState.Features.ToList();
                foreach (FeatureValuePair theFeature in nextStateFeatures)
                {
                    //Generate hashcode of a state that is missing this feature. i.e. A state that is only different by one feature, so it could lead to this state.
                    int stateHashcode = nextState.GetHashCodeWithout(theFeature);

                    //If the state exists, get it.
                    State prevState = null;
                    if (StateSpace.ContainsKey(stateHashcode))
                    {
                        //Get the state
                        prevState = StateSpace[stateHashcode];
                    }
                    else
                    {
                        //Copy list of features
                        List <FeatureValuePair> prevStateFeatures = nextStateFeatures.ToList();

                        //Remove unwanted feature
                        prevStateFeatures.Remove(theFeature);

                        //Create a new state
                        prevState = new State(prevStateFeatures, dataVector);
                        AddState(prevState, trainingDetails);
                        //StateSpace.Add(prevState.GetHashCode(), prevState); trainingDetails.StatesCreated++;
                        continue;
                    }

                    //Create the query to update
                    Query theQuery = new Query(theFeature, dataVector.Label);

                    //Get reward from datavector for querying this feature
                    double featureReward = dataVector[theFeature.Name].Importance;

                    //Adjust the query
                    prevState.AdjustQuery(theQuery, nextStateLabelReward, featureReward, DiscountFactor);
                }
            }
        }

Exemple #4

0

Afficher le fichier

Fichier : Policy.cs Projet : zeta1999/Reinforcement-Learning-Based-Decision-Tree

        /// <summary>
        /// A recursive learning process. A state is updated and analised using a training datavector.
        /// The labels are initially updated, then it is determined if the current label or visiting another
        /// state provides greater reward.
        /// </summary>
        /// <param name="currentState">The state to be updated and analized.</param>
        /// <param name="dataVector">The list of features, rewards and label to update with</param>
        /// <param name="trainingDetails">Provides statistics of the learning process.</param>
        private void Learn(State currentState, DataVectorTraining dataVector, int totalQueries, TrainingStats trainingDetails)
        {
            lock (processLock)
            {
                //Choose random or best query
                Query recommendedQuery = null;
                if (rand.NextDouble() < ExplorationRate) //Pick random query 10% of the time
                {
                    //Pick random query
                    Query randomQuery = currentState.GetRandomQuery(dataVector, rand);
                    recommendedQuery = randomQuery;
                }
                else
                {
                    //Find best query
                    Query bestQuery = currentState.GetBestQuery(dataVector);
                    recommendedQuery = bestQuery;
                }

                //Check total queries
                if (totalQueries > QueriesLimit)
                {
                    recommendedQuery = null;
                }

                //Adjust expected reward of labels
                if (recommendedQuery == null || ParallelReportUpdatesEnabled)
                {
                    currentState.AdjustLabels(dataVector.Label);
                }

                //If no query, then end training for this datapoint
                if (recommendedQuery == null)
                {
                    return;
                }

                //Search for next state, or create it
                State nextState    = null;
                int   nextHashCode = currentState.GetHashCodeWith(recommendedQuery.Feature);
                if (StateSpace.ContainsKey(nextHashCode))
                {
                    //Get existing state
                    nextState = StateSpace[nextHashCode];
                }
                else
                {
                    //Create a new state
                    nextState = new State(currentState, recommendedQuery.Feature, dataVector);
                    AddState(nextState, trainingDetails);
                }

                //Process next state, to get adjustment for selected query
                Learn(nextState, dataVector, totalQueries + 1, trainingDetails); trainingDetails.QueriesTotal++;

                //Update State's Query's expected reward
                if (ParallelQueryUpdatesEnabled)
                {
                    //Update all queries that lead to this next state
                    ParallelPathsUpdate(nextState, dataVector, trainingDetails);
                }
                else
                {
                    //Update just current state's query
                    double featureReward        = dataVector[recommendedQuery.Feature.Name].Importance;
                    double nextStateLabelReward = nextState.Labels[dataVector.Label];
                    currentState.AdjustQuery(recommendedQuery, nextStateLabelReward, featureReward, DiscountFactor);
                }

                //Return
                return;
            }
        }

C# (CSharp) RLDT TrainingStats Exemples