Example #1
0
        /// <summary>
        /// Create new NeuralNetworkTrainer.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        /// <param name="inputDat">Input data.</param>
        /// <param name="targetDat">Target data.</param>
        /// <param name="maxUnrollLen">Memory state unroll times, for recurring layers.</param>
        /// <param name="losType">Loss calculation type, NeuralNetworkTrainer.LOSS_TYPE_AVERAGE/MAX/CROSSENTROPY.</param>
        public NeuralNetworkTrainer(NeuralNetwork nn, float[][] inputDat, float[][] targetDat, int maxUnrollLen, int losType)
        {
            neuralNetwork = nn;
            inputData     = inputDat;
            targetData    = targetDat;

            maxUnrollLength = maxUnrollLen;
            if (maxUnrollLength < 1)
            {
                maxUnrollLength = 1;
            }

            lossType = losType;

            //check for recurring layer, if need to stack and unroll
            if (nn.outputLayer.recurring)
            {
                hasRecurring = true;
            }
            else
            {
                for (int i = 0; i < nn.hiddenLayers.Length; i++)
                {
                    if (nn.hiddenLayers[i].recurring)
                    {
                        hasRecurring = true;
                        break;
                    }
                }
            }

            derivatives.Setup(nn);
            adagradMemory.Setup(nn);
            adagradMemory.Reset();

            int tunrollLen = maxUnrollLength;

            if (!hasRecurring)
            {
                tunrollLen = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[tunrollLen];
            stackedFullContext      = new NeuralNetworkFullContext[tunrollLen];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[tunrollLen];
            for (int i = 0; i < stackedRuntimeContext.Length; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
            }
        }
Example #2
0
        /// <summary>
        /// Create new NeuralNetworkQLearning system.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        /// <param name="inputDat">Input data.</param>
        /// <param name="targetDat">Target data.</param>
        public NeuralNetworkQLearning(NeuralNetwork nn, int maxUnrollLen, string sessionsFileName)
        {
            neuralNetwork        = nn;
            learningSessionsFile = sessionsFileName;

            maxUnrollLength = maxUnrollLen;
            if (maxUnrollLength < 1)
            {
                maxUnrollLength = 1;
            }

            //check for recurring layer, if need to stack and unroll
            for (int i = 0; i < nn.hiddenLayers.Length; i++)
            {
                if (nn.hiddenLayers[i].recurring)
                {
                    hasRecurring = true;
                    break;
                }
            }

            derivatives.Setup(nn);
            if (!hasRecurring)
            {
                maxUnrollLength = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[maxUnrollLength];
            stackedFullContext      = new NeuralNetworkFullContext[maxUnrollLength];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLength];
            for (int i = 0; i < stackedRuntimeContext.Length; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
            }

            /*
             * if (hasRecurring)
             * {
             *  recurringMemoryState = new float[nn.hiddenLayers.Length][];
             *  for (int i = 0; i < nn.hiddenLayers.Length; i++)
             *  {
             *      if (nn.hiddenLayers[i].recurring)
             *      {
             *          recurringMemoryState[i] = new float[nn.hiddenLayers[i].numberOfNeurons];
             *      }
             *  }
             * }*/
        }
Example #3
0
        /// <summary>
        /// Create new NeuralNetworkGenerator.
        /// </summary>
        /// <param name="nn">NeuralNetwork to train.</param>
        public NeuralNetworkGenerator(NeuralNetwork nn, int maxUnrollLen)
        {
            neuralNetwork   = nn;
            maxUnrollLength = maxUnrollLen;

            //check for recurring layer, if need to stack and unroll
            for (int i = 0; i < nn.hiddenLayers.Length; i++)
            {
                if (nn.hiddenLayers[i].recurring)
                {
                    hasRecurring = true;
                    break;
                }
            }

            derivatives.Setup(nn);

            if (hasRecurring)
            {
                recurringBPBuffer = new float[nn.hiddenLayers.Length][];
                for (int i = 0; i < recurringBPBuffer.Length - 1; i++)
                {
                    if (nn.hiddenLayers[i].recurring)
                    {
                        recurringBPBuffer[i] = new float[nn.hiddenLayers[i].numberOfNeurons];
                    }
                }
            }
            else
            {
                maxUnrollLen = 1;
            }

            stackedRuntimeContext   = new NeuralNetworkContext[maxUnrollLen];
            stackedFullContext      = new NeuralNetworkFullContext[maxUnrollLen];
            stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLen];
            for (int i = 0; i < maxUnrollLen; i++)
            {
                stackedRuntimeContext[i] = new NeuralNetworkContext();
                stackedRuntimeContext[i].Setup(nn);

                stackedFullContext[i] = new NeuralNetworkFullContext();
                stackedFullContext[i].Setup(nn);

                stackedDerivativeMemory[i] = new NeuralNetworkPropagationState();
                stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives);
                stackedDerivativeMemory[i].inputMem = new float[nn.inputLayer.numberOfNeurons];
            }
        }
Example #4
0
        private void backpropagate(int level, int index, float deriv, NeuralNetworkPropagationState propState)
        {
            if (level < 0)
            {
                return;
            }

            int i, weightIndex;

            float[] b, m, w;

            //recurring weights
            if (level < propState.recurrWeightMems.Length && propState.recurrWeightMems[level] != null)
            {
                b = propState.recurrBuf[level];
                m = propState.recurrWeightMems[level];
                w = propState.recurrWeights[level];

                i           = b.Length;
                weightIndex = w.Length - (index + 1) * i;
                float nhderiv = 0.0f;
                while (i-- > 0)
                {
                    m[weightIndex] += deriv * b[i];
                    nhderiv        += deriv * w[weightIndex];
                    weightIndex++;
                }

#pragma warning disable 414,1718
                if (nhderiv != nhderiv || float.IsInfinity(nhderiv))
                {
                    nhderiv = 0.0f;
                }
#pragma warning restore 1718
                propState.derivativeMemory.altRecurringBPBuffer[level][index] = nhderiv;
            }

            float[] bpb = null;

            //biases and weights
            b = propState.buf[level];
            m = propState.weightMems[level];
            w = propState.weights[level];

            bpb = null;
            if (level != 0)
            {
                bpb = propState.derivativeMemory.recurringBPBuffer[level - 1];
            }

            propState.biasMems[level][index] += deriv;

            i           = b.Length;
            weightIndex = w.Length - (index + 1) * i;
            while (i-- > 0)
            {
                float nderiv = b[i];
                m[weightIndex] += deriv * nderiv;
                if (level != 0)
                {
                    nderiv *= nderiv;

                    float bpropderiv = 0.0f;
                    if (bpb != null)
                    {
                        bpropderiv = bpb[i];
                    }

                    propState.state[level - 1][i] += (1.0f - nderiv) * (deriv * w[weightIndex] + bpropderiv);
                }
                else
                {
                    if (propState.inputMem != null)
                    {
                        nderiv *= nderiv;

                        float bpropderiv = 0.0f;
                        if (bpb != null)
                        {
                            bpropderiv = bpb[i];
                        }

                        nderiv = (1.0f - nderiv) * (deriv * w[weightIndex] + bpropderiv);
                        propState.inputMem[i] += nderiv;
                    }
                }
                weightIndex++;
            }
        }
Example #5
0
        /// <summary>
        /// Run neural network backwards calculating derivatives to use for adagrad or generation.
        /// </summary>
        /// <param name="target"></param>
        /// <param name="context"></param>
        /// <param name="fullContext"></param>
        /// <param name="derivMem"></param>
        public void ExecuteBackwards(float[] target, NeuralNetworkContext context, NeuralNetworkFullContext fullContext, NeuralNetworkPropagationState propState, int lossType, int crossEntropyTarget)
        {
            //prepare for back propagation
            for (int i = 0; i < propState.state.Length; i++)
            {
                Utils.Fill(propState.state[i], 0.0f);
            }

            //back propagation + calculate max loss
            int lid = hiddenLayers.Length;

            float lossAvg = 0.0f;

            for (int i = 0; i < target.Length; i++)
            {
                float deriv = context.outputData[i] - target[i];

                if (lossType == NeuralNetworkTrainer.LOSS_TYPE_MAX)
                {
                    float aderiv = Math.Abs(deriv);
                    if (aderiv > lossAvg)
                    {
                        lossAvg = aderiv;
                    }
                }
                else if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE)
                {
                    lossAvg += Math.Abs(deriv);
                }

                backpropagate(lid, i, deriv, propState);
            }

            if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE)
            {
                lossAvg /= (float)target.Length;
            }
            else
            {
                if (lossType == NeuralNetworkTrainer.LOSS_TYPE_CROSSENTROPY && crossEntropyTarget != -1)
                {
                    lossAvg = (float)-Math.Log(context.outputData[crossEntropyTarget]);
                    if (float.IsInfinity(lossAvg))
                    {
                        lossAvg = 1e8f;
                    }
                }
            }

            propState.loss = lossAvg;
            propState.derivativeMemory.SwapBPBuffers();

            int k = lid;

            while (k-- > 0)
            {
                int l = hiddenLayers[k].numberOfNeurons;
                while (l-- > 0)
                {
                    backpropagate(k, l, propState.state[k][l], propState);
                }
            }
        }
Example #6
0
        /// <summary>
        /// Add derivatives without per-parameter learning rate.
        /// </summary>
        /// <param name="derivMem"></param>
        /// <param name="weight"></param>
        /// <param name="bias"></param>
        /// <param name="recurrWeight"></param>
        public static void ApplyNoMemory(NeuralNetworkPropagationState derivMem, float[][] weight, float[][] bias, float[][] recurrWeight, float learningRate)
        {
            for (int i = 0; i < weight.Length; i++)
            {
                float[] f = derivMem.weightMems[i],
                w = weight[i];

                int k = f.Length;
                while (k-- > 0)
                {
                    float d = f[k];
                    if (d < -EXPLODING_GRADIENT_CLAMP)
                    {
                        d = -EXPLODING_GRADIENT_CLAMP;
                    }
                    else if (d > EXPLODING_GRADIENT_CLAMP)
                    {
                        d = EXPLODING_GRADIENT_CLAMP;
                    }

                    w[k] -= (learningRate * d);
                }

                f = derivMem.biasMems[i];
                w = bias[i];

                k = f.Length;
                while (k-- > 0)
                {
                    float d = f[k];

                    if (d < -EXPLODING_GRADIENT_CLAMP)
                    {
                        d = -EXPLODING_GRADIENT_CLAMP;
                    }
                    else
                    {
                        if (d > EXPLODING_GRADIENT_CLAMP)
                        {
                            d = EXPLODING_GRADIENT_CLAMP;
                        }
                    }

                    w[k] -= (learningRate * d);
                }

                if (recurrWeight[i] != null)
                {
                    f = derivMem.recurrWeightMems[i];
                    w = recurrWeight[i];

                    k = f.Length;
                    while (k-- > 0)
                    {
                        float d = f[k];

                        if (d < -EXPLODING_GRADIENT_CLAMP)
                        {
                            d = -EXPLODING_GRADIENT_CLAMP;
                        }
                        else
                        {
                            if (d > EXPLODING_GRADIENT_CLAMP)
                            {
                                d = EXPLODING_GRADIENT_CLAMP;
                            }
                        }

                        w[k] -= (learningRate * d);
                    }
                }
            }
        }
Example #7
0
        /// <summary>
        /// Add derivatives to learning rate and apply to network weights/biases.
        /// </summary>
        /// <param name="derivMem"></param>
        /// <param name="weight"></param>
        /// <param name="bias"></param>
        /// <param name="recurrWeight"></param>
        public void Apply(NeuralNetworkPropagationState derivMem, float[][] weight, float[][] bias, float[][] recurrWeight)
        {
            for (int i = 0; i < weights.Length; i++)
            {
                float[] t = weights[i],
                f = derivMem.weightMems[i],
                w = weight[i];

                int k = f.Length;
                while (k-- > 0)
                {
                    float m = t[k],
                          d = f[k];

                    if (d < -EXPLODING_GRADIENT_CLAMP)
                    {
                        d = -EXPLODING_GRADIENT_CLAMP;
                    }
                    else if (d > EXPLODING_GRADIENT_CLAMP)
                    {
                        d = EXPLODING_GRADIENT_CLAMP;
                    }

                    m    += d * d;
                    w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON);

                    t[k] = m;
                }

                t = biases[i];
                f = derivMem.biasMems[i];
                w = bias[i];

                k = f.Length;
                while (k-- > 0)
                {
                    float m = t[k],
                          d = f[k];

                    if (d < -EXPLODING_GRADIENT_CLAMP)
                    {
                        d = -EXPLODING_GRADIENT_CLAMP;
                    }
                    else
                    {
                        if (d > EXPLODING_GRADIENT_CLAMP)
                        {
                            d = EXPLODING_GRADIENT_CLAMP;
                        }
                    }

                    m    += d * d;
                    w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON);

                    t[k] = m;
                }

                t = i < recurringWeights.Length ? recurringWeights[i] : null;
                if (t != null)
                {
                    f = derivMem.recurrWeightMems[i];
                    w = recurrWeight[i];

                    k = f.Length;
                    while (k-- > 0)
                    {
                        float m = t[k],
                              d = f[k];

                        if (d < -EXPLODING_GRADIENT_CLAMP)
                        {
                            d = -EXPLODING_GRADIENT_CLAMP;
                        }
                        else
                        {
                            if (d > EXPLODING_GRADIENT_CLAMP)
                            {
                                d = EXPLODING_GRADIENT_CLAMP;
                            }
                        }

                        m    += d * d;
                        w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON);

                        t[k] = m;
                    }
                }
            }
        }
Example #8
0
 /// <summary>
 /// Add derivatives to learning rate and apply to network weights/biases
 /// </summary>
 /// <param name="derivMem"></param>
 public void Apply(NeuralNetworkPropagationState derivMem)
 {
     Apply(derivMem, derivMem.weights, derivMem.biases, derivMem.recurrWeights);
 }