public void ComputeGradientsRTLR2(IDisposable state, Marshaled<RTLRLayerInfo[][]> inputLayerInfosM, Marshaled<IDeviceArray[]> netValueDerivatesM, Marshaled<RTLRComputationData2> dataM, IDeviceArray2 pValuesOfWeightsA, IDeviceArray outputsA, IDeviceArray desiredOutputsA, SequenceMarker seqMark) { var data = dataM.Instance(); var inputLayerInfos = inputLayerInfosM.Instance(); var netValueDerivates = netValueDerivatesM.Instance(); var pValuesOfWeights = pValuesOfWeightsA.ToManaged2(); var outputs = outputsA != null ? outputsA.ToManaged() : null; var desiredOutputs = desiredOutputsA != null ? desiredOutputsA.ToManaged() : null; var inputs = data.Inputs != null ? data.Inputs().ToManaged() : null; fixed (float* pOutputs = outputs != null ? outputs.InternalArray : null, pDesiredOutputs = desiredOutputs != null ? desiredOutputs.InternalArray : null, pPValuesOfWeights = pValuesOfWeights.InternalArray, pInputs = inputs != null ? inputs.InternalArray : null) { ManagedArrayPtr? outputsPtr = pOutputs != null ? outputs.ToPtr(pOutputs) : default(ManagedArrayPtr?); ManagedArrayPtr? desiredOutputsPtr = pDesiredOutputs != null ? desiredOutputs.ToPtr(pDesiredOutputs) : default(ManagedArrayPtr?); ManagedArrayPtr? inputsPtr = pInputs != null ? inputs.ToPtr(pInputs) : default(ManagedArrayPtr?); int inputsSize = inputs == null ? 1 : inputs.Size; for (int ijValueIndex = 0; ijValueIndex < pValuesOfWeights.Size1; ijValueIndex++) // group Id { float gradient = 0.0f; int iValueIndex = ijValueIndex / inputsSize; int jValueIndex = ijValueIndex % inputsSize; float inputValue = inputsPtr.HasValue ? inputsPtr.Value[jValueIndex] : 1.0f; for (int kLayerIndex = 0; kLayerIndex < data.ULayersCount; kLayerIndex++) { int kLayerSize = netValueDerivates[kLayerIndex].Size; for (int kValueIndex = 0; kValueIndex < kLayerSize; kValueIndex++) { var layerNetValueDerivates = netValueDerivates[kLayerIndex].ToManaged(); int outputLayerIndex = layerNetValueDerivates.Size - 1; bool computeGradient = kLayerIndex == outputLayerIndex && outputs != null && desiredOutputs != null; var p_i_j_k_Ptr = GetPValuesPtr(pValuesOfWeights, pPValuesOfWeights, ijValueIndex, data, kLayerIndex); float sum = 0.0f; var upperInfos_k = inputLayerInfos[kLayerIndex]; foreach (var lLayerInfo in upperInfos_k) { if (lLayerInfo.IsElementOfU) { Debug.Assert(lLayerInfo.Weights != null); int lLayerIndex = lLayerInfo.Index; var p_i_j_l_Ptr = GetPValuesPtr(pValuesOfWeights, pPValuesOfWeights, ijValueIndex, data, lLayerIndex); var weights = lLayerInfo.Weights.ToManaged2(); fixed (float* pWeights = weights.InternalArray) { var weightsPtr = weights.ToPtr2(pWeights); for (int lValueIndex = 0; lValueIndex < lLayerInfo.Size; lValueIndex++) { sum += weightsPtr[lValueIndex, kValueIndex] * p_i_j_l_Ptr[lValueIndex]; } } } } if (data.ILayerIndex == kLayerIndex && iValueIndex == kValueIndex) sum += inputValue; fixed (float* pLayerNetValueDerivates = layerNetValueDerivates.InternalArray) { p_i_j_k_Ptr[kValueIndex] = layerNetValueDerivates.ToPtr(pLayerNetValueDerivates)[kValueIndex] * sum; } if (computeGradient) { Debug.Assert(outputsPtr.HasValue && desiredOutputsPtr.HasValue); gradient += (desiredOutputsPtr.Value[kValueIndex] - outputsPtr.Value[kValueIndex]) * p_i_j_k_Ptr[kValueIndex]; } } } SetGradientsRTLR(data, ijValueIndex, gradient); } } }
private void ComputeGradients(int iLayerIndex, int jLayerIndex, IDeviceArray2 pValuesOfWeights, IDeviceArray outputs, IDeviceArray desiredOutputs, int computationIndex, SequenceMarker seqMark) { // jLayerIndex: 0: Bias, 1..: Weights if (codes.Count > computationIndex) { var code = codes[computationIndex]; if (code != null) code(outputs, desiredOutputs); } else { codes.EnsureSize(computationIndex + 1); Action<IDeviceArray, IDeviceArray> code = null; bool forBias = jLayerIndex == 0; var dataM = mlp.AsMarshaled(new RTLRComputationData2()); var data = dataM.ManagedObject; data.MaxULayerSize = maxULayerSize; data.ULayersCount = uLayersCount; int iLayerIndexN = iLayerIndex + 1; var iLayer = mlp.Layers[iLayerIndexN]; data.ILayerIndex = iLayerIndex; data.JLayerIndex = jLayerIndex; if (forBias) { Debug.Assert(jLayerIndex == 0); data.BiasGradients = mlp.GetBiasGradients(iLayerIndexN); data.BiasGradientSums = mlp.GetBiasGradientSums(iLayerIndexN); } else { Debug.Assert(jLayerIndex > 0); var inputLayerOfILayer = iLayer.Layer.GetInputLayer(jLayerIndex - 1); var inputLayerOfILayerIndex = mlp.GetLayerIndex(inputLayerOfILayer); var weightKey = Tuple.Create(inputLayerOfILayerIndex, iLayerIndexN); data.Inputs = () => mlp.GetNetValues(inputLayerOfILayerIndex); data.Gradients = mlp.GetGradients(weightKey); data.GradientSums = mlp.GetGradientSums(weightKey); } Debug.Assert(!(data.BiasGradients == null && data.BiasGradientSums == null && data.Gradients == null && data.GradientSums == null)); var state = mlp.CreateComputationState(); code = (os, dos) => mlp.Adapter.ComputeActivation.ComputeGradientsRTLR2(state, inputLayerInfos, netValueDerivates, dataM, pValuesOfWeights, os, dos, seqMark); codes[computationIndex] = code; code(outputs, desiredOutputs); } }