public void ComputeForwardRTLR(IDisposable state, Marshaled<DeviceArrayFactory[]> inputsM, Marshaled<IDeviceArray2[]> weightsM, IDeviceArray biases, IDeviceArray outputs, IDeviceArray netValueDerivates, ActivationFunction function, float alpha) { var inputs = inputsM.Instance(); var weights = weightsM.Instance(); Debug.Assert(inputs.Length != 0 && inputs.Length == weights.Length); var mOutputs = outputs.ToManaged(); var mNVDerivs = netValueDerivates.ToManaged(); var mBiases = (ManagedArray)biases; fixed (float* pOutputs = mOutputs.InternalArray, pBiases = mBiases.InternalArray, pNVDerivs = mNVDerivs.InternalArray) { var outputsPtr = mOutputs.ToPtr(pOutputs); var biasesPtr = mBiases.ToPtr(pBiases); var nvDerivsPtr = mNVDerivs.ToPtr(pNVDerivs); if (function == ActivationFunction.Sigmoid) { for (int oIdx = 0; oIdx < outputs.Size; oIdx++) { float sum = biasesPtr[oIdx]; for (int lIdx = 0; lIdx < inputs.Length; lIdx++) { var inputsMA = (inputs[lIdx]()).ToManaged(); var weightsMA = (ManagedArray2)weights[lIdx]; Debug.Assert(inputsMA.Size != 0 && inputsMA.Size == weightsMA.Size1); Debug.Assert(outputs.Size == weightsMA.Size2); fixed (float* pInputs = inputsMA.InternalArray, pWeights = weightsMA.InternalArray) { sum += ComputeForward_Sum(inputsMA.ToPtr(pInputs), weightsMA.ToPtr2(pWeights), oIdx); } } outputsPtr[oIdx] = Sigmoid(sum, alpha); nvDerivsPtr[oIdx] = SigmoidD(sum, alpha); } } else { for (int oIdx = 0; oIdx < outputs.Size; oIdx++) { float sum = biasesPtr[oIdx]; for (int lIdx = 0; lIdx < inputs.Length; lIdx++) { var inputsMA = (inputs[lIdx]()).ToManaged(); var weightsMA = (ManagedArray2)weights[lIdx]; Debug.Assert(inputsMA.Size != 0 && inputsMA.Size == weightsMA.Size1); Debug.Assert(outputs.Size == weightsMA.Size2); fixed (float* pInputs = inputsMA.InternalArray, pWeights = weightsMA.InternalArray) { sum += ComputeForward_Sum(inputsMA.ToPtr(pInputs), weightsMA.ToPtr2(pWeights), oIdx); } } outputsPtr[oIdx] = Math.Min(Math.Max(sum * alpha, -alpha), alpha); nvDerivsPtr[oIdx] = alpha; } } } }
unsafe public void ComputeGradientsRTLR(IDisposable state, Marshaled<RTLRLayerInfo[][]> inputLayerInfosM, Marshaled<IDeviceArray[]> netValueDerivatesM, Marshaled<RTLRComputationData> dataM, Marshaled<IDeviceArray[]> valueRelatedPBuffsM, IDeviceArray outputsA, IDeviceArray desiredOutputsA, SequenceMarker seqMark) { var data = dataM.Instance(); var inputLayerInfos = inputLayerInfosM.Instance(); var netValueDerivates = netValueDerivatesM.Instance(); var outputs = outputsA != null ? outputsA.ToManaged() : null; var desiredOutputs = desiredOutputsA != null ? desiredOutputsA.ToManaged() : null; var inputs = data.Inputs != null ? data.Inputs().ToManaged() : null; var valueRelatedPBuffs = valueRelatedPBuffsM.Instance(); float gradient = 0.0f; fixed (float* pOutputs = outputs != null ? outputs.InternalArray : null, pDesiredOutputs = desiredOutputs != null ? desiredOutputs.InternalArray : null) { float inputValue = inputs != null ? inputs.InternalArray[data.JValueIndex] : 1.0f; int outputLayerIndex = valueRelatedPBuffs.Length - 1; for (int kLayerIndex = 0; kLayerIndex < valueRelatedPBuffs.Length; kLayerIndex++) { var layerNetValueDerivates = netValueDerivates[kLayerIndex].ToManaged(); var p_i_j_k_Values = valueRelatedPBuffs[kLayerIndex].ToManaged(); bool computeGradient = kLayerIndex == outputLayerIndex && pOutputs != null && pDesiredOutputs != null; fixed (float* pLayerNetValueDerivates = layerNetValueDerivates.InternalArray, pp_i_j_k_Values = p_i_j_k_Values.InternalArray) { var layerNetValueDerivatesPtr = layerNetValueDerivates.ToPtr(pLayerNetValueDerivates); var p_i_j_k_ValuesPtr = p_i_j_k_Values.ToPtr(pp_i_j_k_Values); for (int kValueIndex = 0; kValueIndex < p_i_j_k_Values.Size; kValueIndex++) { // i: iLayerIndex, iValueIndex // j: jLayerIndex, jValueIndex // k: kLayerIndex, kValueIndex float sum = 0.0f; var upperInfos_k = inputLayerInfos[kLayerIndex]; foreach (var upperInputLayerInfo in upperInfos_k) { if (upperInputLayerInfo.IsElementOfU) { Debug.Assert(upperInputLayerInfo.Weights != null); int lLayerIndex = upperInputLayerInfo.Index; var p_i_j_l_Values = valueRelatedPBuffs[lLayerIndex].ToManaged(); var weights = upperInputLayerInfo.Weights.ToManaged2(); Debug.Assert(p_i_j_l_Values.Size == weights.Size1); Debug.Assert(weights.Size2 == p_i_j_k_Values.Size); fixed (float* pp_i_j_l = p_i_j_l_Values.InternalArray, pWeights = weights.InternalArray) { var p_i_j_l_ValuesPtr = p_i_j_l_Values.ToPtr(pp_i_j_l); var weightsPtr = weights.ToPtr2(pWeights); for (int lValueIndex = 0; lValueIndex < p_i_j_l_Values.Size; lValueIndex++) { // i: iLayerIndex, iValueIndex // j: jLayerIndex, jValueIndex // k: kLayerIndex, kValueIndex // l: lLayerIndex, lValueIndex sum += weightsPtr[lValueIndex, kValueIndex] * p_i_j_l_ValuesPtr[lValueIndex]; } } } } if (data.ILayerIndex == kLayerIndex && data.IValueIndex == kValueIndex) sum += inputValue; p_i_j_k_ValuesPtr[kValueIndex] = layerNetValueDerivatesPtr[kValueIndex] * sum; if (computeGradient) { var outputsPtr = outputs.ToPtr(pOutputs); var desiredOutputsPtr = desiredOutputs.ToPtr(pDesiredOutputs); gradient += (desiredOutputsPtr[kValueIndex] - outputsPtr[kValueIndex]) * p_i_j_k_ValuesPtr[kValueIndex]; } } } } } if (gradient != 0.0f) SetGradientsRTLR(data, gradient); }
public void ComputeGradientsRTLR2(IDisposable state, Marshaled<RTLRLayerInfo[][]> inputLayerInfosM, Marshaled<IDeviceArray[]> netValueDerivatesM, Marshaled<RTLRComputationData2> dataM, IDeviceArray2 pValuesOfWeightsA, IDeviceArray outputsA, IDeviceArray desiredOutputsA, SequenceMarker seqMark) { var data = dataM.Instance(); var inputLayerInfos = inputLayerInfosM.Instance(); var netValueDerivates = netValueDerivatesM.Instance(); var pValuesOfWeights = pValuesOfWeightsA.ToManaged2(); var outputs = outputsA != null ? outputsA.ToManaged() : null; var desiredOutputs = desiredOutputsA != null ? desiredOutputsA.ToManaged() : null; var inputs = data.Inputs != null ? data.Inputs().ToManaged() : null; fixed (float* pOutputs = outputs != null ? outputs.InternalArray : null, pDesiredOutputs = desiredOutputs != null ? desiredOutputs.InternalArray : null, pPValuesOfWeights = pValuesOfWeights.InternalArray, pInputs = inputs != null ? inputs.InternalArray : null) { ManagedArrayPtr? outputsPtr = pOutputs != null ? outputs.ToPtr(pOutputs) : default(ManagedArrayPtr?); ManagedArrayPtr? desiredOutputsPtr = pDesiredOutputs != null ? desiredOutputs.ToPtr(pDesiredOutputs) : default(ManagedArrayPtr?); ManagedArrayPtr? inputsPtr = pInputs != null ? inputs.ToPtr(pInputs) : default(ManagedArrayPtr?); int inputsSize = inputs == null ? 1 : inputs.Size; for (int ijValueIndex = 0; ijValueIndex < pValuesOfWeights.Size1; ijValueIndex++) // group Id { float gradient = 0.0f; int iValueIndex = ijValueIndex / inputsSize; int jValueIndex = ijValueIndex % inputsSize; float inputValue = inputsPtr.HasValue ? inputsPtr.Value[jValueIndex] : 1.0f; for (int kLayerIndex = 0; kLayerIndex < data.ULayersCount; kLayerIndex++) { int kLayerSize = netValueDerivates[kLayerIndex].Size; for (int kValueIndex = 0; kValueIndex < kLayerSize; kValueIndex++) { var layerNetValueDerivates = netValueDerivates[kLayerIndex].ToManaged(); int outputLayerIndex = layerNetValueDerivates.Size - 1; bool computeGradient = kLayerIndex == outputLayerIndex && outputs != null && desiredOutputs != null; var p_i_j_k_Ptr = GetPValuesPtr(pValuesOfWeights, pPValuesOfWeights, ijValueIndex, data, kLayerIndex); float sum = 0.0f; var upperInfos_k = inputLayerInfos[kLayerIndex]; foreach (var lLayerInfo in upperInfos_k) { if (lLayerInfo.IsElementOfU) { Debug.Assert(lLayerInfo.Weights != null); int lLayerIndex = lLayerInfo.Index; var p_i_j_l_Ptr = GetPValuesPtr(pValuesOfWeights, pPValuesOfWeights, ijValueIndex, data, lLayerIndex); var weights = lLayerInfo.Weights.ToManaged2(); fixed (float* pWeights = weights.InternalArray) { var weightsPtr = weights.ToPtr2(pWeights); for (int lValueIndex = 0; lValueIndex < lLayerInfo.Size; lValueIndex++) { sum += weightsPtr[lValueIndex, kValueIndex] * p_i_j_l_Ptr[lValueIndex]; } } } } if (data.ILayerIndex == kLayerIndex && iValueIndex == kValueIndex) sum += inputValue; fixed (float* pLayerNetValueDerivates = layerNetValueDerivates.InternalArray) { p_i_j_k_Ptr[kValueIndex] = layerNetValueDerivates.ToPtr(pLayerNetValueDerivates)[kValueIndex] * sum; } if (computeGradient) { Debug.Assert(outputsPtr.HasValue && desiredOutputsPtr.HasValue); gradient += (desiredOutputsPtr.Value[kValueIndex] - outputsPtr.Value[kValueIndex]) * p_i_j_k_Ptr[kValueIndex]; } } } SetGradientsRTLR(data, ijValueIndex, gradient); } } }
public void ComputeGradientsBPTTPhase1(IDisposable state, Marshaled<DeviceArrayFactory[]> inputsM, Marshaled<IDeviceArray2[]> gradientsM, IDeviceArray biasGradients, IDeviceArray errors) { var gradients = gradientsM.Instance(); var inputs = inputsM.Instance(); Debug.Assert(gradients != null && biasGradients != null); var mErrors = (ManagedArray)errors; var mBiasGradients = (ManagedArray)biasGradients; fixed (float* pErrors = mErrors.InternalArray, pBiasGradients = mBiasGradients.InternalArray) { var errorsPtr = mErrors.ToPtr(pErrors); var biasGradientsPtr = mBiasGradients.ToPtr(pBiasGradients); for (int eIdx = 0; eIdx < errors.Size; eIdx++) { biasGradientsPtr[eIdx] += errorsPtr[eIdx]; for (int lIdx = 0; lIdx < inputs.Length; lIdx++) { var inputsMA = (inputs[lIdx]()).ToManaged(); var gradientsMA = (ManagedArray2)gradients[lIdx]; fixed (float* pi = inputsMA.InternalArray, pg = gradientsMA.InternalArray) { ComputeGradients_AddGradients(inputsMA.ToPtr(pi), gradientsMA.ToPtr2(pg), errorsPtr, eIdx); } } } } }
public void ComputeGradientsBPTTPhase2(IDisposable state, Marshaled<DeviceArrayFactory[]> inputsM, Marshaled<IDeviceArray2[]> gradientsM, IDeviceArray biasGradients, Marshaled<IDeviceArray2[]> gradientSumsM, IDeviceArray biasGradientSums, IDeviceArray errors, int intItCount) { var gradients = gradientsM.Instance(); var gradientSums = gradientSumsM.Instance(); var inputs = inputsM.Instance(); Debug.Assert(gradients != null && biasGradients != null); bool offline = gradientSums != null && biasGradientSums != null; float by = intItCount; var mErrors = (ManagedArray)errors; var mBiasGradients = (ManagedArray)biasGradients; var mBiasGradientSums = (ManagedArray)biasGradientSums; fixed (float* pErrors = mErrors.InternalArray, pBiasGradients = mBiasGradients.InternalArray, pBiasGradientSums = offline ? mBiasGradientSums.InternalArray : null) { var errorsPtr = mErrors.ToPtr(pErrors); ManagedArrayPtr biasGradientsPtr = ManagedArrayPtr.Null; ManagedArrayPtr biasGradientSumsPtr = ManagedArrayPtr.Null; biasGradientsPtr = mBiasGradients.ToPtr(pBiasGradients); if (offline) biasGradientSumsPtr = mBiasGradientSums.ToPtr(pBiasGradientSums); for (int eIdx = 0; eIdx < errors.Size; eIdx++) { biasGradientsPtr[eIdx] += errorsPtr[eIdx]; biasGradientsPtr[eIdx] /= by; if (offline) biasGradientSumsPtr[eIdx] += biasGradientsPtr[eIdx]; for (int lIdx = 0; lIdx < inputs.Length; lIdx++) { var inputsMA = (inputs[lIdx]()).ToManaged(); var gradientsMA = (ManagedArray2)gradients[lIdx]; if (offline) { var gradientSumsMA = (ManagedArray2)gradientSums[lIdx]; fixed (float* pi = inputsMA.InternalArray, pg = gradientsMA.InternalArray, pgs = gradientSumsMA.InternalArray) { ComputeGradients_AddDivAddGradients(inputsMA.ToPtr(pi), gradientsMA.ToPtr2(pg), gradientSumsMA.ToPtr2(pgs), errorsPtr, eIdx, by); } } else { fixed (float* pi = inputsMA.InternalArray, pg = gradientsMA.InternalArray) { ComputeGradients_AddDivGradients(inputsMA.ToPtr(pi), gradientsMA.ToPtr2(pg), errorsPtr, eIdx, by); } } } }; } }
public void ComputeErrors(IDisposable state, IDeviceArray outputs, IDeviceArray errors, Marshaled<IDeviceArray2[]> lowerWeightsM, Marshaled<IDeviceArray[]> lowerErrorsM, ActivationFunction function, float alpha) { var lowerWeights = lowerWeightsM.Instance(); var lowerErrors = lowerErrorsM.Instance(); var mOutputs = outputs.ToManaged(); var mErrors = (ManagedArray)errors; Debug.Assert(lowerWeights.Length != 0 && lowerWeights.Length == lowerErrors.Length); fixed (float* pOutputs = mOutputs.InternalArray, pErrors = mErrors.InternalArray) { var outputsPtr = mOutputs.ToPtr(pOutputs); var errorsPtr = mErrors.ToPtr(pErrors); if (function == ActivationFunction.Sigmoid) { for (int oIdx = 0; oIdx < outputs.Size; oIdx++) { float sum = 0.0f; for (int lIdx = 0; lIdx < lowerErrors.Length; lIdx++) { var lowerWeightsMA = (ManagedArray2)lowerWeights[lIdx]; var lowerErrorsMA = (ManagedArray)lowerErrors[lIdx]; Debug.Assert(lowerWeightsMA.Size2 == lowerErrorsMA.Size); Debug.Assert(lowerWeightsMA.Size1 == outputs.Size); fixed (float* pLowerWeights = lowerWeightsMA.InternalArray, pLowerErrors = lowerErrorsMA.InternalArray) { sum += ComputeErrors_LowerErrorSum(lowerErrorsMA.ToPtr(pLowerErrors), lowerWeightsMA.ToPtr2(pLowerWeights), oIdx); } } errorsPtr[oIdx] = sum * SigmoidD(outputsPtr[oIdx], alpha); } } else { for (int oIdx = 0; oIdx < outputs.Size; oIdx++) { float sum = 0.0f; for (int lIdx = 0; lIdx < lowerErrors.Length; lIdx++) { var lowerWeightsMA = (ManagedArray2)lowerWeights[lIdx]; var lowerErrorsMA = (ManagedArray)lowerErrors[lIdx]; Debug.Assert(lowerWeightsMA.Size2 == lowerErrorsMA.Size); Debug.Assert(lowerWeightsMA.Size1 == outputs.Size); fixed (float* plw = lowerWeightsMA.InternalArray, ple = lowerWeightsMA.InternalArray) { sum += ComputeErrors_LowerErrorSum(lowerErrorsMA.ToPtr(ple), lowerWeightsMA.ToPtr2(plw), oIdx); } } errorsPtr[oIdx] = sum * alpha; } } } }
public void ComputeGradientsFF(IDisposable state, Marshaled<DeviceArrayFactory[]> inputsM, Marshaled<IDeviceArray2[]> gradientsM, IDeviceArray biasGradients, Marshaled<IDeviceArray2[]> gradientSumsM, IDeviceArray biasGradientSums, IDeviceArray errors) { var gradients = gradientsM.Instance(); var gradientSums = gradientSumsM.Instance(); var inputs = inputsM.Instance(); bool online = gradients != null && biasGradients != null; bool offline = gradientSums != null && biasGradientSums != null; var mErrors = (ManagedArray)errors; var mBiasGradients = (ManagedArray)biasGradients; var mBiasGradientSums = (ManagedArray)biasGradientSums; fixed (float* pErrors = mErrors.InternalArray, pBiasGradients = online ? mBiasGradients.InternalArray : null, pBiasGradientSums = offline ? mBiasGradientSums.InternalArray : null) { var errorsPtr = mErrors.ToPtr(pErrors); ManagedArrayPtr biasGradientsPtr = ManagedArrayPtr.Null; ManagedArrayPtr biasGradientSumsPtr = ManagedArrayPtr.Null; if (online) biasGradientsPtr = mBiasGradients.ToPtr(pBiasGradients); if (offline) biasGradientSumsPtr = mBiasGradientSums.ToPtr(pBiasGradientSums); for (int eIdx = 0; eIdx < errors.Size; eIdx++) { if (online) biasGradientsPtr[eIdx] = errorsPtr[eIdx]; if (offline) biasGradientSumsPtr[eIdx] += errorsPtr[eIdx]; for (int lIdx = 0; lIdx < inputs.Length; lIdx++) { var inputsMA = (inputs[lIdx]()).ToManaged(); if (online && offline) { var gradientsMA = (ManagedArray2)gradients[lIdx]; var gradientSumsMA = (ManagedArray2)gradientSums[lIdx]; fixed (float* pi = inputsMA.InternalArray, pg = gradientsMA.InternalArray, pgs = gradientSumsMA.InternalArray) { ComputeGradients_SetAndAddGradients(inputsMA.ToPtr(pi), gradientsMA.ToPtr2(pg), gradientSumsMA.ToPtr2(pgs), errorsPtr, eIdx); } } else if (online) { var gradientsMA = (ManagedArray2)gradients[lIdx]; fixed (float* pi = inputsMA.InternalArray, pg = gradientsMA.InternalArray) { ComputeGradients_SetGradients(inputsMA.ToPtr(pi), gradientsMA.ToPtr2(pg), errorsPtr, eIdx); } } else { Debug.Assert(offline); var gradientSumsMA = (ManagedArray2)gradientSums[lIdx]; fixed (float* pi = inputsMA.InternalArray, pgs = gradientSumsMA.InternalArray) { ComputeGradients_AddGradients(inputsMA.ToPtr(pi), gradientSumsMA.ToPtr2(pgs), errorsPtr, eIdx); } } } } } }
private void Free(Marshaled<IDeviceArray[]> p) { ResourceManager.Free(p.Instance()); }
private void ComputeGradients(int computationIndex, Marshaled<IDeviceArray[]> valueRelatedPBuffs, int iLayerIndex, int iValueIndex, int jLayerIndex, int jValueIndex, int ijValueIndex, IDeviceArray outputs, IDeviceArray desiredOutputs, SequenceMarker seqMark) { #if DEBUG int outputLayerIndex = valueRelatedPBuffs.Instance().Length - 1; Debug.Assert(outputLayerIndex == mlp.Layers.Count - 2); Debug.Assert(outputLayerIndex == mlp.Layers[mlp.Layers.Count - 1].Index - 1); #endif if (codes.Count > computationIndex) { var code = codes[computationIndex]; if (code != null) code(outputs, desiredOutputs); } else { codes.EnsureSize(computationIndex + 1); Action<IDeviceArray, IDeviceArray> code = null; bool forBias = jValueIndex == -1; var dataM = mlp.AsMarshaled(new RTLRComputationData()); var data = dataM.ManagedObject; int iLayerIndexN = iLayerIndex + 1; var iLayer = mlp.Layers[iLayerIndexN]; data.ILayerIndex = iLayerIndex; data.IValueIndex = iValueIndex; data.JLayerIndex = jLayerIndex; data.JValueIndex = jValueIndex; data.IJValueIndex = ijValueIndex; if (forBias) { Debug.Assert(jLayerIndex == 0); data.BiasGradients = mlp.GetBiasGradients(iLayerIndexN); data.BiasGradientSums = mlp.GetBiasGradientSums(iLayerIndexN); } else { Debug.Assert(jLayerIndex > 0); var inputLayerOfILayer = iLayer.Layer.GetInputLayer(jLayerIndex - 1); var inputLayerOfILayerIndex = mlp.GetLayerIndex(inputLayerOfILayer); var weightKey = Tuple.Create(inputLayerOfILayerIndex, iLayerIndexN); data.Inputs = () => mlp.GetNetValues(inputLayerOfILayerIndex); data.Gradients = mlp.GetGradients(weightKey); data.GradientSums = mlp.GetGradientSums(weightKey); } Debug.Assert(!(data.BiasGradients == null && data.BiasGradientSums == null && data.Gradients == null && data.GradientSums == null)); var state = mlp.CreateComputationState(); code = (os, dos) => mlp.Adapter.ComputeActivation.ComputeGradientsRTLR(state, inputLayerInfos, netValueDerivates, dataM, valueRelatedPBuffs, os, dos, seqMark); codes[computationIndex] = code; code(outputs, desiredOutputs); } }
unsafe public void ComputeGradientsRTLR(Marshaled<RTLRComputationData> dataM, Marshaled<IDeviceArray[]> valueRelatedPBuffsM, IDeviceArray outputsA, IDeviceArray desiredOutputsA) { var data = dataM.Instance(); var outputs = outputsA != null ? outputsA.ToManaged() : null; var desiredOutputs = desiredOutputsA != null ? desiredOutputsA.ToManaged() : null; var inputs = data.Inputs != null ? data.Inputs().ToManaged() : null; var valueRelatedPBuffs = valueRelatedPBuffsM.Instance(); float gradient = 0.0f; fixed (float* pOutputs = outputs != null ? outputs.InternalArray : null, pDesiredOutputs = desiredOutputs != null ? desiredOutputs.InternalArray : null, pInputs = inputs != null ? inputs.InternalArray : null) { int outputLayerIndex = valueRelatedPBuffs.Length - 1; for (int kLayerIndex = 0; kLayerIndex < valueRelatedPBuffs.Length; kLayerIndex++) { var layerNetValueDerivates = data.NetValueDerivates[kLayerIndex].ToManaged(); var p_i_j_k_Values = valueRelatedPBuffs[kLayerIndex].ToManaged(); bool computeGradient = kLayerIndex == outputLayerIndex && pOutputs != null && pDesiredOutputs != null; fixed (float* pLayerNetValueDerivates = layerNetValueDerivates.InternalArray, pp_i_j_k_Values = p_i_j_k_Values.InternalArray) { var layerNetValueDerivatesPtr = layerNetValueDerivates.ToPtr(pLayerNetValueDerivates); var p_i_j_k_ValuesPtr = p_i_j_k_Values.ToPtr(pp_i_j_k_Values); for (int kValueIndex = 0; kValueIndex < p_i_j_k_Values.Size; kValueIndex++) { // i: iLayerIndex, iValueIndex // j: jLayerIndex, jValueIndex // k: kLayerIndex, kValueIndex float netDeriv_k = layerNetValueDerivatesPtr[kValueIndex]; float sum = 0.0f; var upperInfos_k = data.InputLayerInfos[kLayerIndex]; foreach (var upperNonInputLayerInfo in upperInfos_k) { Debug.Assert(upperNonInputLayerInfo.Weights != null); int lLayerIndex = upperNonInputLayerInfo.Index; var p_i_j_l_Values = valueRelatedPBuffs[lLayerIndex].ToManaged(); var weights = upperNonInputLayerInfo.Weights.ToManaged2(); Debug.Assert(p_i_j_l_Values.Size == weights.Size1); Debug.Assert(weights.Size2 == p_i_j_k_Values.Size); fixed (float* pp_i_j_l = p_i_j_l_Values.InternalArray, pWeights = weights.InternalArray) { var p_i_j_l_ValuesPtr = p_i_j_l_Values.ToPtr(pp_i_j_l); var weightsPtr = weights.ToPtr(pWeights); for (int lValueIndex = 0; lValueIndex < p_i_j_l_Values.Size; lValueIndex++) { // i: iLayerIndex, iValueIndex // j: jLayerIndex, jValueIndex // k: kLayerIndex, kValueIndex // l: lLayerIndex, lValueIndex sum += weightsPtr[lValueIndex, kValueIndex] * p_i_j_l_ValuesPtr[lValueIndex]; } } } if (data.ILayerIndex == kLayerIndex && data.IValueIndex == kValueIndex) { if (inputs != null) { // Weighted connection var inputsPtr = inputs.ToPtr(pInputs); sum += inputsPtr[data.JValueIndex]; } else //if (kValueIndex == 0) { Debug.Assert(data.JValueIndex == -1); // Biased connection sum += 1.0f; } } p_i_j_k_ValuesPtr[kValueIndex] = netDeriv_k * sum; if (computeGradient) { var outputsPtr = outputs.ToPtr(pOutputs); var desiredOutputsPtr = desiredOutputs.ToPtr(pDesiredOutputs); gradient += (desiredOutputsPtr[kValueIndex] - outputsPtr[kValueIndex]) * p_i_j_k_ValuesPtr[kValueIndex]; } //p_i_j_k_ValuesPtr[kValueIndex] = netDeriv_k * sum; } } } } if (gradient != 0.0f) SetGradientsRTLR(data, gradient); }
private void Reset(Marshaled<IDeviceArray[]> p) { foreach (var da in p.Instance()) mlp.Adapter.VectorUtils.Zero(da); }
private void ComputeGradients(int computationIndex, Marshaled<IDeviceArray[]> valueRelatedPBuffs, int iLayerIndex, int iValueIndex, int jLayerIndex, int jValueIndex, IDeviceArray outputs, IDeviceArray desiredOutputs) { #if DEBUG int outputLayerIndex = valueRelatedPBuffs.Instance().Length - 1; Debug.Assert(outputLayerIndex == mlp.Layers.Count - 2); Debug.Assert(outputLayerIndex == mlp.Layers[mlp.Layers.Count - 1].Index - 1); #endif if (codes.Count > computationIndex) { var code = codes[computationIndex]; if (code != null) code(valueRelatedPBuffs, outputs, desiredOutputs); } else { codes.EnsureSize(computationIndex + 1); Action<Marshaled<IDeviceArray[]>, IDeviceArray, IDeviceArray> code = null; bool forBias = jValueIndex == -1; var dataM = mlp.AsMarshaled(new RTLRComputationData()); var data = dataM.ManagedObject; int iLayerIndexN = iLayerIndex + 1; var iLayer = mlp.Layers[iLayerIndexN]; data.ILayerIndex = iLayerIndex; data.IValueIndex = iValueIndex; data.JLayerIndex = jLayerIndex; data.JValueIndex = jValueIndex; if (forBias) { Debug.Assert(jLayerIndex == 0); data.BiasGradients = mlp.GetBiasGradients(iLayerIndexN); data.BiasGradientSums = mlp.GetBiasGradientSums(iLayerIndexN); } else { Debug.Assert(jLayerIndex > 0); var inputLayerOfILayer = iLayer.Layer.GetInputLayer(jLayerIndex - 1); var inputLayerOfILayerIndex = mlp.GetLayerIndex(inputLayerOfILayer); var weightKey = Tuple.Create(inputLayerOfILayerIndex, iLayerIndexN); data.Inputs = () => mlp.GetNetValues(inputLayerOfILayerIndex); data.Gradients = mlp.GetGradients(weightKey); data.GradientSums = mlp.GetGradientSums(weightKey); } Debug.Assert(!(data.BiasGradients == null && data.BiasGradientSums == null && data.Gradients == null && data.GradientSums == null)); data.NetValueDerivates = netValueDerivates; data.InputLayerInfos = (from lidx in Enumerable.Range(1, mlp.Layers.Count - 1) let layer = mlp.Layers[lidx].Layer select (from inputLayer in layer.GetInputLayers() where inputLayer != mlp.Layers[0].Layer let iidx = mlp.GetLayerIndex(inputLayer) select new RTLRLayerInfo { Index = iidx - 1, Size = inputLayer.Size, Weights = mlp.Weights[Tuple.Create(iidx, lidx)] }).ToArray()).ToArray(); code = (p, os, dos) => mlp.Adapter.ComputeActivation.ComputeGradientsRTLR(dataM, p, os, dos); codes[computationIndex] = code; code(valueRelatedPBuffs, outputs, desiredOutputs); } }