static void AddSLGO() { var obj1 = new GameObject("LearningModel_SL"); obj1.AddComponent <SupervisedLearningModel>(); var obj2 = new GameObject("Trainer_SL"); obj2.AddComponent <TrainerMimic>(); var obj3 = new GameObject("SL_Learning"); obj1.transform.parent = obj3.transform; obj2.transform.parent = obj3.transform; //try to create parameter assets SupervisedLearningNetworkSimple network = null; TrainerParamsMimic trainerParam = null; CreateAssets <TrainerParamsMimic, SupervisedLearningNetworkSimple>("TrainerParamSL_" + obj1.scene.name + ".asset", "NetworkSL_" + obj1.scene.name + ".asset", out trainerParam, out network); network.hiddenLayers = new List <UnityNetwork.SimpleDenseLayerDef>(); network.hiddenLayers.Add(new UnityNetwork.SimpleDenseLayerDef()); var trainer = obj2.GetComponent <TrainerMimic>(); trainer.modelRef = obj1.GetComponent <SupervisedLearningModel>(); trainer.parameters = trainerParam; trainer.checkpointPath = checkpointPath; trainer.checkpointFileName = "Checkpoint_" + obj1.scene.name + ".bytes"; trainer.trainingDataSaveFileName = "Collected_SL_Data_" + obj1.scene.name + ".bytes"; ((SupervisedLearningModel)trainer.modelRef).network = network; }
public override void Initialize() { modelSL = modelRef as ISupervisedLearningModel; Debug.Assert(modelSL != null, "Please assign a ISupervisedLearningModel to modelRef"); Debug.Assert(BrainToTrain != null, "brain can not be null"); parametersMimic = parameters as TrainerParamsMimic; Debug.Assert(parametersMimic != null, "Please Specify PPO Trainer Parameters"); stats = new StatsLogger(); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); var brainParameters = BrainToTrain.brainParameters; //intialize data buffer Debug.Assert(brainParameters.vectorActionSize.Length <= 1, "Action branching is not supported yet"); List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>() { new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : 1 }) }; if (brainParameters.vectorObservationSize > 0) { allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations })); } for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i) { int width = brainParameters.cameraResolutions[i].width; int height = brainParameters.cameraResolutions[i].height; int channels; if (brainParameters.cameraResolutions[i].blackAndWhite) { channels = 1; } else { channels = 3; } allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels })); } allBufferData.Add(new DataBuffer.DataInfo("Reward", typeof(float), new int[] { 1 })); dataBuffer = new DataBuffer(parametersMimic.maxBufferSize, allBufferData.ToArray()); if (continueFromCheckpoint) { LoadModel(); } if (loadTrainingDataFromCheckpoint) { LoadTrainingData(); } }
public override void InitializeInner(BrainParameters brainParameters, Tensor inputStateTensor, List <Tensor> inputVisualTensors, TrainerParams trainerParams) { //build the network var networkOutputs = network.BuildNetwork(inputStateTensor, inputVisualTensors, null, ActionSize, ActionSpace); Tensor outputAction = networkOutputs.Item1; Tensor outputVar = networkOutputs.Item2; hasVariance = outputVar != null && brainParameters.vectorActionSpaceType == SpaceType.continuous; List <Tensor> observationInputs = new List <Tensor>(); if (HasVectorObservation) { observationInputs.Add(inputStateTensor); } if (HasVisualObservation) { observationInputs.AddRange(inputVisualTensors); } if (hasVariance) { ActionFunction = K.function(observationInputs, new List <Tensor> { outputAction, outputVar }, null, "ActionFunction"); } else { ActionFunction = K.function(observationInputs, new List <Tensor> { outputAction }, null, "ActionFunction"); } //build the parts for training TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic; if (trainerParams != null && trainingParams == null) { Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type"); } if (trainingParams != null) { //training inputs var inputActionLabel = UnityTFUtils.Input(new int?[] { ActionSpace == SpaceType.continuous ? ActionSize : 1 }, name: "InputAction", dtype: ActionSpace == SpaceType.continuous ? DataType.Float : DataType.Int32)[0]; //creat the loss Tensor loss = null; if (ActionSpace == SpaceType.discrete) { Tensor actionOnehot = K.one_hot(inputActionLabel, K.constant(ActionSize, dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f)); Tensor reshapedOnehot = K.reshape(actionOnehot, new int[] { -1, ActionSize }); loss = K.mean(K.categorical_crossentropy(reshapedOnehot, outputAction, false)); } else { if (hasVariance) { loss = K.mean(K.mean(0.5 * K.square(inputActionLabel - outputAction) / outputVar + 0.5 * K.log(outputVar))); } else { loss = K.mean(new MeanSquareError().Call(inputActionLabel, outputAction)); } } //add inputs, outputs and parameters to the list List <Tensor> updateParameters = network.GetWeights(); List <Tensor> allInputs = new List <Tensor>(); if (HasVectorObservation) { allInputs.Add(inputStateTensor); observationInputs.Add(inputStateTensor); } if (HasVisualObservation) { allInputs.AddRange(inputVisualTensors); observationInputs.AddRange(inputVisualTensors); } allInputs.Add(inputActionLabel); //create optimizer and create necessary functions var updates = AddOptimizer(updateParameters, loss, optimizer); UpdateFunction = K.function(allInputs, new List <Tensor> { loss }, updates, "UpdateFunction"); } }
protected void InitializeSLStructureContinuousAction(Tensor vectorObs, Tensor normalizedVectorObs, List <Tensor> visualObs, TrainerParams trainerParams) { //build the network Tensor outputValue = null; Tensor outputActionMean = null; Tensor outputLogVariance = null; network.BuildNetworkForContinuousActionSapce(normalizedVectorObs, visualObs, null, null, ActionSizes[0], out outputActionMean, out outputValue, out outputLogVariance); Tensor outputAction = outputActionMean; Tensor outputVar = K.exp(outputLogVariance); SLHasVar = outputLogVariance != null; List <Tensor> observationInputs = new List <Tensor>(); if (HasVectorObservation) { observationInputs.Add(vectorObs); } if (HasVisualObservation) { observationInputs.AddRange(visualObs); } if (SLHasVar) { ActionFunction = K.function(observationInputs, new List <Tensor> { outputAction, outputVar }, null, "ActionFunction"); } else { ActionFunction = K.function(observationInputs, new List <Tensor> { outputAction }, null, "ActionFunction"); } //build the parts for training TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic; if (trainerParams != null && trainingParams == null) { Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type"); } if (trainingParams != null) { //training inputs var inputActionLabel = UnityTFUtils.Input(new int?[] { ActionSizes[0] }, name: "InputAction", dtype: DataType.Float)[0]; //creat the loss Tensor loss = null; if (SLHasVar) { loss = K.mean(K.mean(0.5 * K.square(inputActionLabel - outputAction) / outputVar + 0.5 * outputLogVariance)); } else { loss = K.mean(new MeanSquareError().Call(inputActionLabel, outputAction)); } //add inputs, outputs and parameters to the list List <Tensor> updateParameters = network.GetActorWeights(); List <Tensor> allInputs = new List <Tensor>(); allInputs.AddRange(observationInputs); allInputs.Add(inputActionLabel); //create optimizer and create necessary functions var updates = AddOptimizer(updateParameters, loss, optimizer); UpdateSLFunction = K.function(allInputs, new List <Tensor> { loss }, updates, "UpdateFunction"); } }
protected void InitializeSLStructureDiscreteAction(Tensor vectorObs, Tensor normalizedVectorObs, List <Tensor> visualObs, TrainerParams trainerParams) { //all inputs list List <Tensor> allObservationInputs = new List <Tensor>(); if (HasVectorObservation) { allObservationInputs.Add(vectorObs); } if (HasVisualObservation) { allObservationInputs.AddRange(visualObs); } //build basic network Tensor[] outputActionsLogits = null; Tensor outputValue = null; network.BuildNetworkForDiscreteActionSpace(normalizedVectorObs, visualObs, null, null, ActionSizes, out outputActionsLogits, out outputValue); //the action masks input placeholders List <Tensor> actionMasksInputs = new List <Tensor>(); for (int i = 0; i < ActionSizes.Length; ++i) { actionMasksInputs.Add(UnityTFUtils.Input(new int?[] { ActionSizes[i] }, name: "AcionMask" + i)[0]); } //masking and normalized and get the final action tensor Tensor[] outputActions, outputNormalizedLogits; CreateDiscreteActionMaskingLayer(outputActionsLogits, actionMasksInputs.ToArray(), out outputActions, out outputNormalizedLogits); //output tensors for discrete actions. Includes all action selected actions var outputDiscreteActions = new List <Tensor>(); outputDiscreteActions.Add(K.identity(K.cast(ActionSizes.Length == 1 ? outputActions[0] : K.concat(outputActions.ToList(), 1), DataType.Float), "OutputAction")); var actionFunctionInputs = new List <Tensor>(); actionFunctionInputs.AddRange(allObservationInputs); actionFunctionInputs.AddRange(actionMasksInputs); ActionFunction = K.function(actionFunctionInputs, outputDiscreteActions, null, "ActionFunction"); //build the parts for training TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic; if (trainerParams != null && trainingParams == null) { Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type"); } if (trainingParams != null) { //training inputs var inputActionLabels = UnityTFUtils.Input(new int?[] { ActionSizes.Length }, name: "InputAction", dtype: DataType.Int32)[0]; //split the input for each discrete branch List <Tensor> inputActionsDiscreteSeperated = null, onehotInputActions = null; //for discrete action space var splits = new int[ActionSizes.Length]; for (int i = 0; i < splits.Length; ++i) { splits[i] = 1; } inputActionsDiscreteSeperated = K.split(inputActionLabels, K.constant(splits, dtype: DataType.Int32), K.constant(1, dtype: DataType.Int32), ActionSizes.Length); //creat the loss onehotInputActions = inputActionsDiscreteSeperated.Select((x, i) => K.reshape(K.one_hot(x, K.constant <int>(ActionSizes[i], dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f)), new int[] { -1, ActionSizes[i] })).ToList(); var losses = onehotInputActions.Select((x, i) => K.mean(K.categorical_crossentropy(x, outputNormalizedLogits[i], true))).ToList(); Tensor loss = losses.Aggregate((x, s) => x + s); //add inputs, outputs and parameters to the list List <Tensor> updateParameters = network.GetActorWeights(); List <Tensor> allInputs = new List <Tensor>(); allInputs.AddRange(actionFunctionInputs); allInputs.Add(inputActionLabels); //create optimizer and create necessary functions var updates = AddOptimizer(updateParameters, loss, optimizer); UpdateSLFunction = K.function(allInputs, new List <Tensor> { loss }, updates, "UpdateFunction"); } }