/// <summary> /// /// </summary> /// <param name="actor"></param> /// <param name="critic"></param> /// <param name="actionsNum"></param> /// <param name="featuresNum"></param> /// <param name="actionKeys"></param> public DQN( ISupportNet actor, ISupportNet critic, int actionsNum, int featuresNum, int[] actionKeys) { _actionsNumber = actionsNum; _featuresNumber = featuresNum; _actionKeys = actionKeys; _actorNet = actor; _criticNet = critic; //统一学习批次 _batchSize = (int)actor.BatchSize; }
/// <summary> /// DQN model. /// you should set a new env object to DQN.Env before train it. /// and there is no need env if you apply it only /// string modelDirectoryname, string deviceName, IEnv env, int epochs = 3000, int switchEpoch = -1 /// </summary> /// <param name="modelFilename"></param> /// <param name="env"></param> /// <param name="epochs"></param> /// <returns></returns> public static DQN Load( byte[] actorBuffer, byte[] criticBuffer, int actionsNum, int featuresNum, int[] actionKeys, string innerTypeName, string deviceName) { ISupportNet actor = null, critic = null; //if (innerTypeName == typeof(FCN7).Name) //{ // actor = FCN7.Load(actorBuffer, deviceName); // critic = FCN7.Load(criticBuffer, deviceName); //} return(new DQN(actor, critic, actionsNum, featuresNum, actionKeys)); }
public void Accept(ISupportNet sourceNet) { //convert to bytes byte[] bytes = sourceNet.PersistenceMemory(); //read model and set parameters classifierOutput = Function.Load(bytes, device); inputVariable = classifierOutput.Inputs.First(v => v.Name == "inputVariable"); outputVariable = Variable.InputVariable(classifierOutput.Output.Shape, DataType.Float, "labelVariable"); var trainingLoss = CNTKLib.SquaredError(classifierOutput, outputVariable); var prediction = CNTKLib.SquaredError(classifierOutput, outputVariable); TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.00178125, 1); //0.00178125 TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256); IList <Learner> parameterLearners = new List <Learner>() { Learner.MomentumSGDLearner(classifierOutput.Parameters(), learningRatePerSample, momentumTimeConstant, true) }; trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); }