static void AddSLGO()
    {
        var obj1 = new GameObject("LearningModel_SL");

        obj1.AddComponent <SupervisedLearningModel>();
        var obj2 = new GameObject("Trainer_SL");

        obj2.AddComponent <TrainerMimic>();

        var obj3 = new GameObject("SL_Learning");

        obj1.transform.parent = obj3.transform;
        obj2.transform.parent = obj3.transform;

        //try to create parameter assets
        SupervisedLearningNetworkSimple network = null;
        TrainerParamsMimic trainerParam         = null;

        CreateAssets <TrainerParamsMimic, SupervisedLearningNetworkSimple>("TrainerParamSL_" + obj1.scene.name + ".asset",
                                                                           "NetworkSL_" + obj1.scene.name + ".asset",
                                                                           out trainerParam, out network);
        network.hiddenLayers = new List <UnityNetwork.SimpleDenseLayerDef>();
        network.hiddenLayers.Add(new UnityNetwork.SimpleDenseLayerDef());

        var trainer = obj2.GetComponent <TrainerMimic>();

        trainer.modelRef                 = obj1.GetComponent <SupervisedLearningModel>();
        trainer.parameters               = trainerParam;
        trainer.checkpointPath           = checkpointPath;
        trainer.checkpointFileName       = "Checkpoint_" + obj1.scene.name + ".bytes";
        trainer.trainingDataSaveFileName = "Collected_SL_Data_" + obj1.scene.name + ".bytes";

        ((SupervisedLearningModel)trainer.modelRef).network = network;
    }
Пример #2
0
    public override void Initialize()
    {
        modelSL = modelRef as ISupervisedLearningModel;
        Debug.Assert(modelSL != null, "Please assign a ISupervisedLearningModel to modelRef");
        Debug.Assert(BrainToTrain != null, "brain can not be null");
        parametersMimic = parameters as TrainerParamsMimic;
        Debug.Assert(parametersMimic != null, "Please Specify PPO Trainer Parameters");
        stats = new StatsLogger();
        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);

        var brainParameters = BrainToTrain.brainParameters;

        //intialize data buffer
        Debug.Assert(brainParameters.vectorActionSize.Length <= 1, "Action branching is not supported yet");
        List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>()
        {
            new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : 1 })
        };

        if (brainParameters.vectorObservationSize > 0)
        {
            allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations }));
        }

        for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i)
        {
            int width  = brainParameters.cameraResolutions[i].width;
            int height = brainParameters.cameraResolutions[i].height;
            int channels;
            if (brainParameters.cameraResolutions[i].blackAndWhite)
            {
                channels = 1;
            }
            else
            {
                channels = 3;
            }

            allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels }));
        }
        allBufferData.Add(new DataBuffer.DataInfo("Reward", typeof(float), new int[] { 1 }));

        dataBuffer = new DataBuffer(parametersMimic.maxBufferSize, allBufferData.ToArray());

        if (continueFromCheckpoint)
        {
            LoadModel();
        }
        if (loadTrainingDataFromCheckpoint)
        {
            LoadTrainingData();
        }
    }
    public override void InitializeInner(BrainParameters brainParameters, Tensor inputStateTensor, List <Tensor> inputVisualTensors, TrainerParams trainerParams)
    {
        //build the network
        var    networkOutputs = network.BuildNetwork(inputStateTensor, inputVisualTensors, null, ActionSize, ActionSpace);
        Tensor outputAction   = networkOutputs.Item1;
        Tensor outputVar      = networkOutputs.Item2;

        hasVariance = outputVar != null && brainParameters.vectorActionSpaceType == SpaceType.continuous;

        List <Tensor> observationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            observationInputs.Add(inputStateTensor);
        }
        if (HasVisualObservation)
        {
            observationInputs.AddRange(inputVisualTensors);
        }
        if (hasVariance)
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction, outputVar
            }, null, "ActionFunction");
        }
        else
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction
            }, null, "ActionFunction");
        }

        //build the parts for training
        TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic;

        if (trainerParams != null && trainingParams == null)
        {
            Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type");
        }
        if (trainingParams != null)
        {
            //training inputs
            var inputActionLabel = UnityTFUtils.Input(new int?[] { ActionSpace == SpaceType.continuous ? ActionSize : 1 }, name: "InputAction", dtype: ActionSpace == SpaceType.continuous ? DataType.Float : DataType.Int32)[0];
            //creat the loss
            Tensor loss = null;
            if (ActionSpace == SpaceType.discrete)
            {
                Tensor actionOnehot   = K.one_hot(inputActionLabel, K.constant(ActionSize, dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f));
                Tensor reshapedOnehot = K.reshape(actionOnehot, new int[] { -1, ActionSize });
                loss = K.mean(K.categorical_crossentropy(reshapedOnehot, outputAction, false));
            }
            else
            {
                if (hasVariance)
                {
                    loss = K.mean(K.mean(0.5 * K.square(inputActionLabel - outputAction) / outputVar + 0.5 * K.log(outputVar)));
                }
                else
                {
                    loss = K.mean(new MeanSquareError().Call(inputActionLabel, outputAction));
                }
            }
            //add inputs, outputs and parameters to the list
            List <Tensor> updateParameters = network.GetWeights();
            List <Tensor> allInputs        = new List <Tensor>();


            if (HasVectorObservation)
            {
                allInputs.Add(inputStateTensor);
                observationInputs.Add(inputStateTensor);
            }
            if (HasVisualObservation)
            {
                allInputs.AddRange(inputVisualTensors);
                observationInputs.AddRange(inputVisualTensors);
            }
            allInputs.Add(inputActionLabel);

            //create optimizer and create necessary functions
            var updates = AddOptimizer(updateParameters, loss, optimizer);
            UpdateFunction = K.function(allInputs, new List <Tensor> {
                loss
            }, updates, "UpdateFunction");
        }
    }
Пример #4
0
    protected void InitializeSLStructureContinuousAction(Tensor vectorObs, Tensor normalizedVectorObs, List <Tensor> visualObs, TrainerParams trainerParams)
    {
        //build the network
        Tensor outputValue = null; Tensor outputActionMean = null; Tensor outputLogVariance = null;

        network.BuildNetworkForContinuousActionSapce(normalizedVectorObs, visualObs, null, null, ActionSizes[0], out outputActionMean, out outputValue, out outputLogVariance);
        Tensor outputAction = outputActionMean;
        Tensor outputVar    = K.exp(outputLogVariance);

        SLHasVar = outputLogVariance != null;

        List <Tensor> observationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            observationInputs.Add(vectorObs);
        }
        if (HasVisualObservation)
        {
            observationInputs.AddRange(visualObs);
        }
        if (SLHasVar)
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction, outputVar
            }, null, "ActionFunction");
        }
        else
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction
            }, null, "ActionFunction");
        }

        //build the parts for training
        TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic;

        if (trainerParams != null && trainingParams == null)
        {
            Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type");
        }
        if (trainingParams != null)
        {
            //training inputs
            var inputActionLabel = UnityTFUtils.Input(new int?[] { ActionSizes[0] }, name: "InputAction", dtype: DataType.Float)[0];
            //creat the loss
            Tensor loss = null;
            if (SLHasVar)
            {
                loss = K.mean(K.mean(0.5 * K.square(inputActionLabel - outputAction) / outputVar + 0.5 * outputLogVariance));
            }
            else
            {
                loss = K.mean(new MeanSquareError().Call(inputActionLabel, outputAction));
            }

            //add inputs, outputs and parameters to the list
            List <Tensor> updateParameters = network.GetActorWeights();
            List <Tensor> allInputs        = new List <Tensor>();
            allInputs.AddRange(observationInputs);
            allInputs.Add(inputActionLabel);

            //create optimizer and create necessary functions
            var updates = AddOptimizer(updateParameters, loss, optimizer);
            UpdateSLFunction = K.function(allInputs, new List <Tensor> {
                loss
            }, updates, "UpdateFunction");
        }
    }
Пример #5
0
    protected void InitializeSLStructureDiscreteAction(Tensor vectorObs, Tensor normalizedVectorObs, List <Tensor> visualObs, TrainerParams trainerParams)
    {
        //all inputs list
        List <Tensor> allObservationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            allObservationInputs.Add(vectorObs);
        }
        if (HasVisualObservation)
        {
            allObservationInputs.AddRange(visualObs);
        }

        //build basic network
        Tensor[] outputActionsLogits = null;
        Tensor   outputValue         = null;

        network.BuildNetworkForDiscreteActionSpace(normalizedVectorObs, visualObs, null, null, ActionSizes, out outputActionsLogits, out outputValue);

        //the action masks input placeholders
        List <Tensor> actionMasksInputs = new List <Tensor>();

        for (int i = 0; i < ActionSizes.Length; ++i)
        {
            actionMasksInputs.Add(UnityTFUtils.Input(new int?[] { ActionSizes[i] }, name: "AcionMask" + i)[0]);
        }
        //masking and normalized and get the final action tensor
        Tensor[] outputActions, outputNormalizedLogits;
        CreateDiscreteActionMaskingLayer(outputActionsLogits, actionMasksInputs.ToArray(), out outputActions, out outputNormalizedLogits);

        //output tensors for discrete actions. Includes all action selected actions
        var outputDiscreteActions = new List <Tensor>();

        outputDiscreteActions.Add(K.identity(K.cast(ActionSizes.Length == 1 ? outputActions[0] : K.concat(outputActions.ToList(), 1), DataType.Float), "OutputAction"));
        var actionFunctionInputs = new List <Tensor>();

        actionFunctionInputs.AddRange(allObservationInputs);
        actionFunctionInputs.AddRange(actionMasksInputs);
        ActionFunction = K.function(actionFunctionInputs, outputDiscreteActions, null, "ActionFunction");


        //build the parts for training
        TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic;

        if (trainerParams != null && trainingParams == null)
        {
            Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type");
        }
        if (trainingParams != null)
        {
            //training inputs
            var inputActionLabels = UnityTFUtils.Input(new int?[] { ActionSizes.Length }, name: "InputAction", dtype: DataType.Int32)[0];
            //split the input for each discrete branch
            List <Tensor> inputActionsDiscreteSeperated = null, onehotInputActions = null;    //for discrete action space
            var           splits = new int[ActionSizes.Length];
            for (int i = 0; i < splits.Length; ++i)
            {
                splits[i] = 1;
            }
            inputActionsDiscreteSeperated = K.split(inputActionLabels, K.constant(splits, dtype: DataType.Int32), K.constant(1, dtype: DataType.Int32), ActionSizes.Length);

            //creat the loss
            onehotInputActions = inputActionsDiscreteSeperated.Select((x, i) => K.reshape(K.one_hot(x, K.constant <int>(ActionSizes[i], dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f)), new int[] { -1, ActionSizes[i] })).ToList();

            var    losses = onehotInputActions.Select((x, i) => K.mean(K.categorical_crossentropy(x, outputNormalizedLogits[i], true))).ToList();
            Tensor loss = losses.Aggregate((x, s) => x + s);

            //add inputs, outputs and parameters to the list
            List <Tensor> updateParameters = network.GetActorWeights();
            List <Tensor> allInputs = new List <Tensor>();
            allInputs.AddRange(actionFunctionInputs);
            allInputs.Add(inputActionLabels);

            //create optimizer and create necessary functions
            var updates = AddOptimizer(updateParameters, loss, optimizer);
            UpdateSLFunction = K.function(allInputs, new List <Tensor> {
                loss
            }, updates, "UpdateFunction");
        }
    }