public abstract void SingleOutputBackward(NdArray y, NdArray x);
public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, bool train, NdArray <Real> gamma, NdArray <Real> beta, NdArray <Real> avgMean, NdArray <Real> avgVar, Real[] std, Real[] xhat, int channelSize) { beta.InitGrad(); gamma.InitGrad(); int dataSize = x.Length / channelSize; for (int i = 0; i < channelSize; i++) { for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { int index = b * y.Length + i * dataSize + location; beta.Grad[i] += y.Grad[index]; gamma.Grad[i] += y.Grad[index] * xhat[index]; } } } if (train) { // 学習あり for (int i = 0; i < channelSize; i++) { Real gs = gamma.Data[i] / std[i]; for (int b = 0; b < y.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { int index = b * y.Length + i * dataSize + location; Real val = (xhat[index] * gamma.Grad[i] + beta.Grad[i]) / (y.BatchCount * dataSize); x.Grad[index] += gs * (y.Grad[index] - val); } } } } else { // 学習なし for (int i = 0; i < channelSize; i++) { Real gs = gamma.Data[i] / std[i]; avgMean.Grad[i] = -gs * beta.Grad[i]; avgVar.Grad[i] = -0.5f * gamma.Data[i] / avgVar.Data[i] * gamma.Grad[i]; for (int b = 0; b < y.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { x.Grad[b * y.Length + i * dataSize + location] += gs * y.Grad[b * y.Length + i * dataSize + location]; } } } } }
private NdArray ForwardCpu(NdArray input) { int outputHeight = _coverAll ? (int)Math.Floor((input.Shape[1] - this._kHeight + this._padY * 2.0 + this._strideY - 1.0) / this._strideY) + 1 : (int)Math.Floor((input.Shape[1] - this._kHeight + this._padY * 2.0) / this._strideY) + 1; int outputWidth = _coverAll ? (int)Math.Floor((input.Shape[2] - this._kWidth + this._padX * 2.0 + this._strideX - 1.0) / this._strideX) + 1 : (int)Math.Floor((input.Shape[2] - this._kWidth + this._padX * 2.0) / this._strideX) + 1; int[] outputIndices = new int[input.Shape[0] * outputHeight * outputWidth * input.BatchCount]; for (int i = 0; i < outputIndices.Length; i++) { outputIndices[i] = -1; } for (int b = 0; b < input.BatchCount; b++) { int outBatchOffset = b * input.Shape[0] * outputHeight * outputWidth; int inBatchOffset = b * input.Length; for (int i = 0; i < input.Shape[0]; i++) { int outChOffset = outBatchOffset + i * outputHeight * outputWidth; int inChOffset = inBatchOffset + i * input.Shape[1] * input.Shape[2]; for (int y = 0; y < outputHeight; y++) { int inIndexY = y * _strideY - _padY; int dyLimit = this._kHeight < input.Shape[1] - inIndexY ? this._kHeight : input.Shape[1] - inIndexY; int dyStart = inIndexY < 0 ? -inIndexY : 0; for (int x = 0; x < outputWidth; x++) { int inIndexX = x * _strideX - _padX; int dxLimit = this._kWidth < input.Shape[2] - inIndexX ? this._kWidth : input.Shape[2] - inIndexX; int dxStart = inIndexX < 0 ? -inIndexX : 0; int inBaseIndex = inChOffset + inIndexY * input.Shape[2] + inIndexX; int outIndex = outChOffset + y * outputWidth + x; Real maxVal = float.NegativeInfinity; outputIndices[outIndex] = -1; for (int dy = dyStart; dy < dyLimit; dy++) { for (int dx = dxStart; dx < dxLimit; dx++) { int inputIndex = inBaseIndex + dy * input.Shape[2] + dx; if (maxVal < input.Data[inputIndex]) { maxVal = input.Data[inputIndex]; outputIndices[outIndex] = inputIndex; } } } } } } } return(GetForwardResult(input, outputIndices, outputWidth, outputHeight)); }
public GradientClippingParameter(NdArray functionParameter, GradientClipping optimizer) : base(functionParameter) { this.optimizer = optimizer; }
protected abstract void MultiOutputBackward(NdArray[] ys, NdArray x);
public static void NeedPreviousBackwardCpu(this ICompressibleActivation <Real> compressibleActivation, NdArray <Real> y, NdArray <Real> x) { for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += compressibleActivation.BackwardActivate(y.Grad[i], y.Data[i], x.Data[i]); } }
public static NdArray <Real> SingleInputForward(NdArray <Real> x, IFunction <Real> upward, IFunction <Real> lateral, List <Real[][]> paramList, List <NdArray <Real> > hPrevParams, ref NdArray <Real> hParam, ref Real[] lcPrev, int outputCount, IFunction <Real> lstm) { int outputDataSize = x.BatchCount * outputCount; NdArray <Real> lstmIn = upward.Forward(x)[0]; if (hParam == null) { lcPrev = new Real[outputDataSize]; } else { NdArray <Real> hPrevParam = hParam.Clone(); if (hPrevParam.Grad != null) { hPrevParam.InitGrad(); } lstmIn += lateral.Forward(hPrevParam)[0]; hPrevParams.Add(hPrevParam); } //0:cPrev 1:a 2:i 3:f 4:o 5:c Real[][] param = { lcPrev, new Real[outputDataSize], new Real[outputDataSize], new Real[outputDataSize], new Real[outputDataSize], new Real[outputDataSize] }; Real[] lhParam = new Real[outputDataSize]; int index = 0; for (int outIndex = 0; outIndex < lhParam.Length; outIndex++) { param[1][outIndex] = Math.Tanh(lstmIn.Data[index++]); param[2][outIndex] = Sigmoid(lstmIn.Data[index++]); param[3][outIndex] = Sigmoid(lstmIn.Data[index++]); param[4][outIndex] = Sigmoid(lstmIn.Data[index++]); param[5][outIndex] = param[1][outIndex] * param[2][outIndex] + param[3][outIndex] * param[0][outIndex]; lhParam[outIndex] = param[4][outIndex] * Math.Tanh(param[5][outIndex]); } paramList.Add(param); //Backwardで消えないように別で保管 lcPrev = param[5]; hParam = new NdArray <Real>(lhParam, new[] { outputCount }, x.BatchCount, lstm); return(hParam); }
public static T Train <T, LabelType>(FunctionStack <T> functionStack, NdArray <T> input, NdArray <LabelType> teach, LossFunction <T, LabelType> lossFunction, Optimizer <T> optimizer = null) where T : unmanaged, IComparable <T> where LabelType : unmanaged, IComparable <LabelType> { optimizer?.SetUp(functionStack); //結果の誤差保存用 NdArray <T> result = functionStack.Forward(input)[0]; T loss = lossFunction.Evaluate(result, teach); //Backwardのバッチを実行 functionStack.Backward(result); //更新 optimizer?.Update(); return(loss); }
//精度測定 public static T Accuracy <T>(FunctionStack <T> functionStack, T[][] x, int[][] y) where T : unmanaged, IComparable <T> { return(Accuracy(functionStack, NdArray.FromArrays(x), NdArray.FromArrays(y))); }
//Backward public override void Backward(params NdArray[] ys) { NdArray.Backward(ys[0]); }
//バッチで学習処理を行う public static T Train <T, LabelType>(FunctionStack <T> functionStack, T[][] input, LabelType[][] teach, LossFunction <T, LabelType> lossFunction, Optimizer <T> optimizer = null) where T : unmanaged, IComparable <T> where LabelType : unmanaged, IComparable <LabelType> { return(Train(functionStack, NdArray.FromArrays(input), NdArray.FromArrays(teach), lossFunction, optimizer)); }
public AdaGradParameter(NdArray functionParameter, AdaGrad optimizer) : base(functionParameter) { this.h = new Real[functionParameter.Data.Length]; this.optimizer = optimizer; }
private NdArray ForwardCpu(NdArray x) { int dataSize = x.Length / ChannelSize; //計算用パラメータの取得 if (this.IsTrain) { //メンバのMeanとVarianceを設定する this.Variance = new Real[this.ChannelSize]; this.Mean = new Real[this.ChannelSize]; for (int i = 0; i < this.ChannelSize; i++) { for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { this.Mean[i] += x.Data[b * x.Length + i * dataSize + location]; } } this.Mean[i] /= x.BatchCount * dataSize; for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { this.Variance[i] += (x.Data[b * x.Length + i * dataSize + location] - this.Mean[i]) * (x.Data[b * x.Length + i * dataSize + location] - this.Mean[i]); } } this.Variance[i] = this.Variance[i] / (x.BatchCount * dataSize) + this.Eps; } } else { this.Mean = this.AvgMean.Data; this.Variance = this.AvgVar.Data; } this.Std = new Real[this.ChannelSize]; for (int i = 0; i < this.Std.Length; i++) { this.Std[i] = Math.Sqrt(this.Variance[i]); } //結果を計算 this.Xhat = new Real[x.Data.Length]; Real[] y = new Real[x.Data.Length]; for (int i = 0; i < this.ChannelSize; i++) { for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { int index = b * x.Length + i * dataSize + location; this.Xhat[index] = (x.Data[index] - this.Mean[i]) / this.Std[i]; y[index] = this.Gamma.Data[i] * this.Xhat[index] + this.Beta.Data[i]; } } } //パラメータを更新 if (this.IsTrain) { int m = x.BatchCount; Real adjust = m / Math.Max(m - 1.0, 1.0); // unbiased estimation for (int i = 0; i < this.AvgMean.Data.Length; i++) { this.AvgMean.Data[i] *= this.Decay; this.Mean[i] *= 1 - this.Decay; // reuse buffer as a temporary this.AvgMean.Data[i] += this.Mean[i]; this.AvgVar.Data[i] *= this.Decay; this.Variance[i] *= (1 - this.Decay) * adjust; // reuse buffer as a temporary this.AvgVar.Data[i] += this.Variance[i]; } } return(NdArray.Convert(y, x.Shape, x.BatchCount, this)); }
protected override void MultiOutputBackward(NdArray[] ys, NdArray x) { }
private TestDataSet <T> GetRandomData(int batchCount, Func <int> getIndexFunc) { T[] data = new T[NdArray.ShapeToLength(Shape) * batchCount]; int[] label = new int[batchCount]; for (int i = 0; i < batchCount; i++) { int index = getIndexFunc(); T[] labeledData = Get(index); Array.Copy(labeledData, 0, data, i * labeledData.Length, labeledData.Length); label[i] = DataLabel[index]; } TestDataSet <T> result = new TestDataSet <T>(NdArray.Convert(data, Shape, batchCount), NdArray.Convert(label, new[] { 1 }, batchCount)); return(result); }
public static T Accuracy <T>(FunctionStack <T> functionStack, NdArray <T> x, NdArray <int> y) where T : unmanaged, IComparable <T> { return(Accuracy(x, y, functionStack.Predict(x)[0])); }
public static NdArray <Real> NeedPreviousForwardCpu(this ICompressibleActivation <Real> compressibleActivation, NdArray <Real> x) { Real[] y = new Real[x.Data.Length]; for (int i = 0; i < y.Length; i++) { y[i] = compressibleActivation.ForwardActivate(x.Data[i]); } return(NdArray.Convert(y, x.Shape, x.BatchCount, compressibleActivation)); }
//精度測定 public static T Accuracy <T>(FunctionStack <T> functionStack, T[][] x, int[][] y, LossFunction <T, int> lossFunction, out T loss) where T : unmanaged, IComparable <T> { return(Accuracy(functionStack, NdArray.FromArrays(x), NdArray.FromArrays(y), lossFunction, out loss)); }
public static Real[] GetActivatedgy(this ICompressibleActivation <Real> compressibleActivation, NdArray <Real> y, NdArray <Real> x) { Real[] activatedgy = new Real[y.Grad.Length]; for (int i = 0; i < activatedgy.Length; i++) { activatedgy[i] = compressibleActivation.BackwardActivate(y.Grad[i], y.Data[i], x.Data[i]); } return(activatedgy); }
//精度測定 public static double Accuracy(FunctionStack functionStack, Array[] x, Array[] y) { return(Accuracy(functionStack, NdArray.FromArrays(x), NdArray.FromArrays(y))); }
public static void SingleOutputBackward(NdArray <Real> y, IFunction <Real> upward, IFunction <Real> lateral, List <Real[][]> paramLists, List <NdArray <Real> > hPrevParams, List <Real[][]> usedParamLists, List <NdArray <Real> > hUsedPrevParams, List <Real[]> gxPrevGrads, int outputCount, ActionOptional <Real> backward) { Real[] gxPrevGrad = new Real[y.BatchCount * outputCount * 4]; Real[] gcPrev = new Real[y.BatchCount * outputCount]; //0:cPrev 1:a 2:i 3:f 4:o 5:c Real[][] param = paramLists[paramLists.Count - 1]; paramLists.RemoveAt(paramLists.Count - 1); usedParamLists.Add(param); int index = 0; for (int prevOutputIndex = 0; prevOutputIndex < gcPrev.Length; prevOutputIndex++) { Real co = Math.Tanh(param[5][prevOutputIndex]); gcPrev[prevOutputIndex] += y.Grad[prevOutputIndex] * param[4][prevOutputIndex] * GradTanh(co); gxPrevGrad[index++] = gcPrev[prevOutputIndex] * param[2][prevOutputIndex] * GradTanh(param[1][prevOutputIndex]); gxPrevGrad[index++] = gcPrev[prevOutputIndex] * param[1][prevOutputIndex] * GradSigmoid(param[2][prevOutputIndex]); gxPrevGrad[index++] = gcPrev[prevOutputIndex] * param[0][prevOutputIndex] * GradSigmoid(param[3][prevOutputIndex]); gxPrevGrad[index++] = y.Grad[prevOutputIndex] * co *GradSigmoid(param[4][prevOutputIndex]); gcPrev[prevOutputIndex] *= param[3][prevOutputIndex]; } gxPrevGrads.Add(gxPrevGrad); if (hPrevParams.Count > 0) { //linearのBackwardはgxPrev.Gradしか使わないのでgxPrev.Dataは空 NdArray <Real> gxPrev = new NdArray <Real>(new[] { outputCount * 4 }, y.BatchCount); gxPrev.Grad = gxPrevGrad; lateral.Backward(gxPrev); NdArray <Real> hPrevParam = hPrevParams[hPrevParams.Count - 1]; hPrevParams.RemoveAt(hPrevParams.Count - 1); hUsedPrevParams.Add(hPrevParam); //hのBakckward backward(hPrevParam); //使い切ったら戻す if (hPrevParams.Count == 0) { hPrevParams.AddRange(hUsedPrevParams); hUsedPrevParams.Clear(); } } //linearのBackwardはgy.Gradしか使わないのでgy.Dataは空 NdArray <Real> gy = new NdArray <Real>(new[] { outputCount * 4 }, y.BatchCount); gy.Grad = gxPrevGrads[0]; gxPrevGrads.RemoveAt(0); upward.Backward(gy); //使い切ったら戻す if (paramLists.Count == 0) { paramLists.AddRange(usedParamLists); usedParamLists.Clear(); } }
//バッチで学習処理を行う public static Real Train(FunctionStack functionStack, Array[] input, Array[] teach, LossFunction lossFunction, bool isUpdate = true) { return(Train(functionStack, NdArray.FromArrays(input), NdArray.FromArrays(teach), lossFunction, isUpdate)); }
protected abstract NdArray[] SingleInputForward(NdArray x);
public MomentumSGDParameter(NdArray functionParameter, MomentumSGD optimizer) : base(functionParameter) { this.v = new Real[functionParameter.Data.Length]; this.optimizer = optimizer; }
public static NdArray <Real> SingleInputForward(NdArray <Real> x, bool train, NdArray <Real> gamma, NdArray <Real> beta, NdArray <Real> avgMean, NdArray <Real> avgVar, ref Real n, bool finetune, ref Real decay, Real Eps, out Real[] std, out Real[] xhat, int channelSize, IFunction <Real> batchNorm) { Real[] Mean; Real[] Variance; if (finetune) { n++; decay = 1 - 1 / n; } int dataSize = x.Length / channelSize; //計算用パラメータの取得 if (train) { //メンバのMeanとVarianceを設定する Variance = new Real[channelSize]; Mean = new Real[channelSize]; for (int i = 0; i < channelSize; i++) { for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { Mean[i] += x.Data[b * x.Length + i * dataSize + location]; } } Mean[i] /= x.BatchCount * dataSize; for (int b = 0; b < x.BatchCount; b++) { for (int location = 0; location < dataSize; location++) { Variance[i] += (x.Data[b * x.Length + i * dataSize + location] - Mean[i]) * (x.Data[b * x.Length + i * dataSize + location] - Mean[i]); } } Variance[i] /= x.BatchCount * dataSize; } } else { Mean = avgMean.Data; Variance = avgVar.Data; } std = new Real[channelSize]; for (int i = 0; i < std.Length; i++) { std[i] = Math.Sqrt(Variance[i] + Eps); } //結果を計算 xhat = new Real[x.Data.Length]; Real[] y = new Real[x.Data.Length]; for (int b = 0; b < x.BatchCount; b++) { for (int i = 0; i < channelSize; i++) { for (int location = 0; location < dataSize; location++) { int index = b * x.Length + i * dataSize + location; xhat[index] = (x.Data[index] - Mean[i]) / std[i]; y[index] = gamma.Data[i] * xhat[index] + beta.Data[i]; } } } //パラメータを更新 if (train) { Real adjust = x.BatchCount / Math.Max(x.BatchCount - 1, 1.0f); // unbiased estimation for (int i = 0; i < avgMean.Data.Length; i++) { avgMean.Data[i] *= decay; Mean[i] *= 1 - decay; // reuse buffer as a temporary avgMean.Data[i] += Mean[i]; avgVar.Data[i] *= decay; Variance[i] *= (1 - decay) * adjust; // reuse buffer as a temporary avgVar.Data[i] += Variance[i]; } } return(NdArray.Convert(y, x.Shape, x.BatchCount, batchNorm)); }
NdArray ForwardCpu(NdArray val) { int[] resultShape; if (val.Shape.Length > this.Shape.Length) { //入力の方が大きい resultShape = val.Shape.ToArray(); int offset = val.Shape.Length - this.Shape.Length; for (int i = offset; i < resultShape.Length; i++) { if (resultShape[i] == 1) { resultShape[i] = this.Shape[i - offset]; } #if DEBUG else if (this.Shape[i - offset] != 1 && resultShape[i] != this.Shape[i - offset]) { throw new Exception("変換不可能な組み合わせです"); } #endif } } else { //指定の方が大きい resultShape = this.Shape.ToArray(); int offset = this.Shape.Length - val.Shape.Length; for (int i = offset; i < resultShape.Length; i++) { if (resultShape[i] == 1) { resultShape[i] = val.Shape[i - offset]; } #if DEBUG else if (val.Shape[i - offset] != 1 && resultShape[i] != val.Shape[i - offset]) { throw new Exception("変換不可能な組み合わせです"); } #endif } } NdArray result = new NdArray(resultShape, val.BatchCount, this); int indexOffset = result.Shape.Length - val.Shape.Length; for (int batchCount = 0; batchCount < result.BatchCount; batchCount++) { for (int i = 0; i < result.Length; i++) { int[] baseIndex = result.GetDimensionsIndex(i); int tmpIndexLastIndex = val.Shape.Length - 1; int valIndex = batchCount * val.Length; int rankoffset = 1; for (int j = 0; j < val.Shape.Length; j++) { if (val.Shape[tmpIndexLastIndex] > 1) { valIndex += baseIndex[tmpIndexLastIndex + indexOffset] * rankoffset; } rankoffset *= val.Shape[tmpIndexLastIndex--]; } result.Data[batchCount * result.Length + i] = val.Data[valIndex]; } } return(result); }
public BatchNormalization(int channelSize, T?decay = null, T?eps = null, bool useGamma = true, bool useBeta = true, int initialGamma = 1, int initialBeta = 0, int initialAvgMean = 0, int initialAvgVar = 1, bool train = true, bool finetune = false, string name = FUNCTION_NAME, string[] inputNames = null, string[] outputNames = null) : base(name, inputNames, outputNames) { this.ChannelSize = channelSize; this.Train = train; this.Finetune = finetune; this.Gamma = new NdArray <T>(channelSize); this.Gamma.Name = this.Name + " Gamma"; this.Beta = new NdArray <T>(channelSize); this.Beta.Name = this.Name + " Beta"; int paramIndex = 0; int paramCount = 0; if (useGamma) { paramCount++; } if (useBeta) { paramCount++; } if (!train) { paramCount += 2; } this.Parameters = new NdArray <T> [paramCount]; //学習対象のParameterを登録 if (useGamma) { this.Parameters[paramIndex++] = this.Gamma; } if (useBeta) { this.Parameters[paramIndex++] = this.Beta; } this.AvgMean = new NdArray <T>(channelSize); this.AvgMean.Name = this.Name + " Mean"; this.AvgVar = new NdArray <T>(channelSize); this.AvgVar.Name = this.Name + " Variance"; this.Decay = decay ?? (TVal <T>) 0.9; this.Eps = eps ?? (TVal <T>) 2e-5; this.Gamma.Fill((TVal <T>)initialGamma); this.Beta.Fill((TVal <T>)initialBeta); this.AvgMean.Fill((TVal <T>)initialAvgMean); this.AvgVar.Fill((TVal <T>)initialAvgVar); //自分で学習せずオプティマイザに任せる if (!this.Train) { this.Parameters[paramIndex++] = this.AvgMean; this.Parameters[paramIndex] = this.AvgVar; } InitFunc(new StreamingContext()); }
public AdaDeltaParameter(NdArray functionParameter, AdaDelta optimizer) : base(functionParameter) { this.msg = new Real[functionParameter.Data.Length]; this.msdx = new Real[functionParameter.Data.Length]; this.optimizer = optimizer; }
public static void UpdateFunctionParameters(Real alpha, Real weightDecayRate, Real beta1, Real beta2, Real epsilon, Real eta, long updateCount, NdArray <Real> functionParameter, Real[] m, Real[] v) { Real alphaT = AdamParameter.GetAlphaT(alpha, beta1, beta2, updateCount); for (int i = 0; i < functionParameter.Data.Length; i++) { Real grad = functionParameter.Grad[i]; m[i] += (1 - beta1) * (grad - m[i]); v[i] += (1 - beta2) * (grad * grad - v[i]); Real step = alphaT / (Math.Sqrt(v[i]) + epsilon); functionParameter.Data[i] -= eta * (step * m[i] + weightDecayRate * functionParameter.Data[i]); } }
public abstract NdArray SingleInputForward(NdArray x);