예제 #1
0
        /// <summary>
        /// Create new NeuralNetworkTrainer.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        /// <param name="inputDat">Input data.</param>
        /// <param name="targetDat">Target data.</param>
        /// <param name="maxUnrollLen">Memory state unroll times, for recurring layers.</param>
        /// <param name="losType">Loss calculation type, NeuralNetworkTrainer.LOSS_TYPE_AVERAGE/MAX/CROSSENTROPY.</param>
        public NeuralNetworkTrainer(NeuralNetwork nn, float[][] inputDat, float[][] targetDat, int maxUnrollLen, int losType)
        {
            neuralNetwork = nn;
            inputData     = inputDat;
            targetData    = targetDat;

            maxUnrollLength = maxUnrollLen;
            if (maxUnrollLength < 1)
            {
                maxUnrollLength = 1;
            }

            lossType = losType;

            //check for recurring layer, if need to stack and unroll
            if (nn.outputLayer.recurring)
            {
                hasRecurring = true;
            }
            else
            {
                for (int i = 0; i < nn.hiddenLayers.Length; i++)
                {
                    if (nn.hiddenLayers[i].recurring)
                    {
                        hasRecurring = true;
                        break;
                    }
                }
            }

            derivatives.Setup(nn);
            adagradMemory.Setup(nn);
            adagradMemory.Reset();

            int tunrollLen = maxUnrollLength;

            if (!hasRecurring)
            {
                tunrollLen = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[tunrollLen];
            stackedFullContext      = new NeuralNetworkFullContext[tunrollLen];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[tunrollLen];
            for (int i = 0; i < stackedRuntimeContext.Length; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
            }
        }
예제 #2
0
        /// <summary>
        /// Create new NeuralNetworkQLearning system.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        /// <param name="inputDat">Input data.</param>
        /// <param name="targetDat">Target data.</param>
        public NeuralNetworkQLearning(NeuralNetwork nn, int maxUnrollLen, string sessionsFileName)
        {
            neuralNetwork        = nn;
            learningSessionsFile = sessionsFileName;

            maxUnrollLength = maxUnrollLen;
            if (maxUnrollLength < 1)
            {
                maxUnrollLength = 1;
            }

            //check for recurring layer, if need to stack and unroll
            for (int i = 0; i < nn.hiddenLayers.Length; i++)
            {
                if (nn.hiddenLayers[i].recurring)
                {
                    hasRecurring = true;
                    break;
                }
            }

            derivatives.Setup(nn);
            if (!hasRecurring)
            {
                maxUnrollLength = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[maxUnrollLength];
            stackedFullContext      = new NeuralNetworkFullContext[maxUnrollLength];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLength];
            for (int i = 0; i < stackedRuntimeContext.Length; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
            }

            /*
             * if (hasRecurring)
             * {
             *  recurringMemoryState = new float[nn.hiddenLayers.Length][];
             *  for (int i = 0; i < nn.hiddenLayers.Length; i++)
             *  {
             *      if (nn.hiddenLayers[i].recurring)
             *      {
             *          recurringMemoryState[i] = new float[nn.hiddenLayers[i].numberOfNeurons];
             *      }
             *  }
             * }*/
        }
예제 #3
0
        public void Setup(NeuralNetwork nn, NeuralNetworkContext context, NeuralNetworkFullContext fullCtx, NeuralNetworkDerivativeMemory derivMem)
        {
            //initialize memory buffers
            state   = new float[nn.hiddenLayers.Length][];
            weights = new float[nn.hiddenLayers.Length + 1][];
            biases  = new float[nn.hiddenLayers.Length + 1][];

            buf       = new float[nn.hiddenLayers.Length + 1][];
            recurrBuf = new float[nn.hiddenLayers.Length][];

            biasMems         = derivMem.biasMems;
            weightMems       = derivMem.weightMems;
            recurrWeightMems = derivMem.recurrWeightMems;
            recurrWeights    = new float[nn.hiddenLayers.Length][];
            derivativeMemory = derivMem;

            for (int i = 0; i < nn.hiddenLayers.Length; i++)
            {
                state[i]   = new float[nn.hiddenLayers[i].numberOfNeurons];
                weights[i] = nn.hiddenConnections[i].weights;
                biases[i]  = nn.hiddenLayers[i].biases;

                if (i == 0)
                {
                    buf[i] = context.inputData;
                }
                else
                {
                    buf[i] = fullCtx.hiddenBuffer[i - 1];
                }

                if (nn.hiddenLayers[i].recurring)
                {
                    recurrWeights[i] = nn.hiddenRecurringConnections[i].weights;
                    recurrBuf[i]     = fullCtx.hiddenRecurringBuffer[i];
                }
            }

            int lid = nn.hiddenLayers.Length;

            weights[lid] = nn.outputConnection.weights;
            biases[lid]  = nn.outputLayer.biases;
            if (lid > 0)
            {
                buf[lid] = fullCtx.hiddenBuffer[lid - 1];
            }
            else
            {
                buf[lid] = context.inputData;
            }
        }
예제 #4
0
        /// <summary>
        /// Create new NeuralNetworkGenerator.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        public NeuralNetworkGenerator(NeuralNetwork nn, int maxUnrollLen)
        {
            neuralNetwork   = nn;
            maxUnrollLength = maxUnrollLen;

            //check for recurring layer, if need to stack and unroll
            for (int i = 0; i < nn.hiddenLayers.Length; i++)
            {
                if (nn.hiddenLayers[i].recurring)
                {
                    hasRecurring = true;
                    break;
                }
            }

            derivatives.Setup(nn);

            if (hasRecurring)
            {
                recurringBPBuffer = new float[nn.hiddenLayers.Length][];
                for (int i = 0; i < recurringBPBuffer.Length - 1; i++)
                {
                    if (nn.hiddenLayers[i].recurring)
                    {
                        recurringBPBuffer[i] = new float[nn.hiddenLayers[i].numberOfNeurons];
                    }
                }
            }
            else
            {
                maxUnrollLen = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[maxUnrollLen];
            stackedFullContext      = new NeuralNetworkFullContext[maxUnrollLen];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLen];
            for (int i = 0; i < maxUnrollLen; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
                stackedDerivativeMemory[i].inputMem = new float[nn.inputLayer.numberOfNeurons];
            }
        }
예제 #5
0
        /// <summary>
        /// Run neural network backwards calculating derivatives to use for adagrad or generation.
        /// </summary>
        /// <param name="target"></param>
        /// <param name="context"></param>
        /// <param name="fullContext"></param>
        /// <param name="derivMem"></param>
        public void ExecuteBackwards(float[] target, NeuralNetworkContext context, NeuralNetworkFullContext fullContext, NeuralNetworkPropagationState propState, int lossType, int crossEntropyTarget)
        {
            //prepare for back propagation
            for (int i = 0; i < propState.state.Length; i++)
            {
                Utils.Fill(propState.state[i], 0.0f);
            }

            //back propagation + calculate max loss
            int lid = hiddenLayers.Length;

            float lossAvg = 0.0f;

            for (int i = 0; i < target.Length; i++)
            {
                float deriv = context.outputData[i] - target[i];

                if (lossType == NeuralNetworkTrainer.LOSS_TYPE_MAX)
                {
                    float aderiv = Math.Abs(deriv);
                    if (aderiv > lossAvg)
                    {
                        lossAvg = aderiv;
                    }
                }
                else if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE)
                {
                    lossAvg += Math.Abs(deriv);
                }

                backpropagate(lid, i, deriv, propState);
            }

            if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE)
            {
                lossAvg /= (float)target.Length;
            }
            else
            {
                if (lossType == NeuralNetworkTrainer.LOSS_TYPE_CROSSENTROPY && crossEntropyTarget != -1)
                {
                    lossAvg = (float)-Math.Log(context.outputData[crossEntropyTarget]);
                    if (float.IsInfinity(lossAvg))
                    {
                        lossAvg = 1e8f;
                    }
                }
            }

            propState.loss = lossAvg;
            propState.derivativeMemory.SwapBPBuffers();

            int k = lid;

            while (k-- > 0)
            {
                int l = hiddenLayers[k].numberOfNeurons;
                while (l-- > 0)
                {
                    backpropagate(k, l, propState.state[k][l], propState);
                }
            }
        }
예제 #6
0
        //execute neural network and save all calculation results in fullContext for adagrad
        /// <summary>
        /// Execute neural network and save all calculation results in fullContext for adagrad
        /// </summary>
        /// <param name="input"></param>
        /// <param name="context"></param>
        /// <param name="fullContext"></param>
        public void Execute_FullContext(NeuralNetworkContext context, NeuralNetworkFullContext fullContext)
        {
            float[] input = context.inputData,
            output = context.outputData,
            hidden = context.hiddenData;

            float[][] hiddenRecurring = context.hiddenRecurringData;

            int i, weightIndex, recurringWeightIndex;

            NeuronActivationFunction activeFunc;

            if (hiddenLayers.Length > 0)
            {
                int     lastNumNeurons = 0;
                float[] weights, biases, recurringWeights;
                for (i = 0; i < hiddenLayers.Length; i++)
                {
                    weights = hiddenConnections[i].weights;
                    biases  = hiddenLayers[i].biases;

                    activeFunc = hiddenLayers[i].activationFunction;

                    float[] ina;
                    int     alen;
                    if (i == 0)
                    {
                        ina  = input;
                        alen = input.Length;
                    }
                    else
                    {
                        ina  = hidden;
                        alen = lastNumNeurons;
                    }

                    if (hiddenLayers[i].recurring)
                    {
                        //recurring hidden layer
                        float[] hrec = hiddenRecurring[i];

                        recurringWeights = hiddenRecurringConnections[i].weights;

                        //copy over data needed for training
                        Array.Copy(hrec, fullContext.hiddenRecurringBuffer[i], hrec.Length);

                        weightIndex          = 0;
                        recurringWeightIndex = 0;

                        int k = biases.Length;
                        while (k-- > 0)
                        {
                            float ov = biases[k];

                            int j = alen;
                            while (j-- > 0)
                            {
                                ov += ina[j] * weights[weightIndex++];
                            }

                            j = hrec.Length;
                            while (j-- > 0)
                            {
                                ov += hrec[j] * recurringWeights[recurringWeightIndex++];
                            }

                            hidden[k] = activeFunc(ov);
                        }

                        Array.Copy(hidden, hrec, biases.Length);
                    }
                    else
                    {
                        //non recurring hidden layer
                        weightIndex = 0;

                        int k = biases.Length;
                        while (k-- > 0)
                        {
                            float ov = biases[k];

                            int j = alen;
                            while (j-- > 0)
                            {
                                ov += ina[j] * weights[weightIndex++];
                            }

                            hidden[k] = activeFunc(ov);
                        }
                    }

                    Array.Copy(hidden, fullContext.hiddenBuffer[i], biases.Length);
                    lastNumNeurons = biases.Length;
                }

                activeFunc = outputLayer.activationFunction;

                //last output layer

                //run input to output layer connection
                weights = outputConnection.weights;
                biases  = outputLayer.biases;

                weightIndex          = 0;
                recurringWeightIndex = 0;

                i = output.Length;
                while (i-- > 0)
                {
                    float ov = biases[i];

                    //input connections
                    int k = lastNumNeurons;
                    while (k-- > 0)
                    {
                        ov += hidden[k] * weights[weightIndex++];
                    }

                    output[i] = activeFunc(ov);
                }
            }
            else
            {
                activeFunc = outputLayer.activationFunction;

                //run input to output layer connection with recurring output
                float[] weights = outputConnection.weights,
                biases = outputLayer.biases;

                weightIndex          = 0;
                recurringWeightIndex = 0;
                i = output.Length;
                while (i-- > 0)
                {
                    float ov = biases[i];

                    //input connections
                    int k = input.Length;
                    while (k-- > 0)
                    {
                        ov += input[k] * weights[weightIndex++];
                    }

                    output[i] = activeFunc(ov);
                }
            }
        }