Пример #1
0
    /// <summary>
    /// Query actions' probabilities based on curren states. The first dimension of the array must be batch dimension. Note that it is the normalized log probability
    /// </summary>
    public virtual float[,] EvaluateProbability(float[,] vectorObservation, float[,] actions, List <float[, , , ]> visualObservation, List <float[, ]> actionsMask = null)
    {
        Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only");
        Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use EvaluateProbability()");

        List <Array> inputLists = new List <Array>();

        if (HasVectorObservation)
        {
            Debug.Assert(vectorObservation != null, "Must Have vector observation inputs!");
            inputLists.Add(vectorObservation);
        }
        if (HasVisualObservation)
        {
            Debug.Assert(visualObservation != null, "Must Have visual observation inputs!");
            inputLists.AddRange(visualObservation);
        }

        var actionProbs = new float[actions.GetLength(0), ActionSpace == SpaceType.continuous ? actions.GetLength(1) : 1];

        if (ActionSpace == SpaceType.continuous)
        {
            inputLists.Add(actions);
            var result = ActionProbabilityFunction.Call(inputLists);
            actionProbs = ((float[, ])result[0].eval());
        }
        else if (ActionSpace == SpaceType.discrete)
        {
            List <float[, ]> masks = actionsMask;
            int batchSize          = vectorObservation.GetLength(0);
            int branchSize         = ActionSizes.Length;
            //create all 1 mask if the input mask is null.
            if (masks == null)
            {
                masks = CreateDummyMasks(ActionSizes, batchSize);
            }

            inputLists.AddRange(masks);

            var result = ActionFunction.Call(inputLists);
            //get the log probabilities
            actionProbs = new float[batchSize, branchSize];
            for (int b = 0; b < branchSize; ++b)
            {
                var tempProbs = ((float[, ])result[b + 1].eval());
                int actSize   = ActionSizes[b];
                for (int i = 0; i < batchSize; ++i)
                {
                    actionProbs[i, b] = tempProbs[i, Mathf.RoundToInt(actions[i, b])];
                }
            }
        }

        return(actionProbs);
    }
Пример #2
0
    /// <summary>
    /// Query actions' probabilities based on curren states. The first dimension of the array must be batch dimension
    /// </summary>
    public virtual float[,] EvaluateProbability(float[,] vectorObservation, float[,] actions, List <float[, , , ]> visualObservation)
    {
        Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only");
        Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use EvaluateProbability()");

        List <Array> inputLists = new List <Array>();

        if (HasVectorObservation)
        {
            Debug.Assert(vectorObservation != null, "Must Have vector observation inputs!");
            inputLists.Add(vectorObservation);
        }
        if (HasVisualObservation)
        {
            Debug.Assert(visualObservation != null, "Must Have visual observation inputs!");
            inputLists.AddRange(visualObservation);
        }

        var actionProbs = new float[actions.GetLength(0), ActionSpace == SpaceType.continuous ? actions.GetLength(1) : 1];

        if (ActionSpace == SpaceType.continuous)
        {
            inputLists.Add(actions);
            var result = ActionProbabilityFunction.Call(inputLists);
            actionProbs = ((float[, ])result[0].eval());
        }
        else if (ActionSpace == SpaceType.discrete)
        {
            var result = ActionFunction.Call(inputLists);

            var outputAction = ((float[, ])result[0].eval());
            for (int j = 0; j < outputAction.GetLength(0); ++j)
            {
                actionProbs[j, 0] = outputAction.GetRow(j)[Mathf.RoundToInt(actions[j, 0])];
            }
        }

        return(actionProbs);
    }