internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { var provider = Control.LinearAlgebraProvider; using (var weightsCpu = this.weights.OnCpu()) { var bottomData = (DenseVector)bottom[0].Data; var topDiff = (DenseVector)top[0].Diff; var weightsData = (DenseVector)weightsCpu.Data; if (GetPropagateDownForParameter(0)) { provider.MatrixMultiplyWithUpdate(Transpose.Transpose, Transpose.DontTranspose, 1f, topDiff.Values, m, n, bottomData.Values, m, k, 0f, weightsData.Values); } if (this.Parameters.UseBias && GetPropagateDownForParameter(1)) { using (var biasCpu = this.bias.OnCpu()) using (var biasMultiplierCpu = this.biasMultiplier.OnCpu()) { var biasData = (DenseVector)biasCpu.Data; var biasMultiplierData = (DenseVector)biasMultiplierCpu.Data; provider.MatrixMultiplyWithUpdate(Transpose.Transpose, Transpose.DontTranspose, 1f, topDiff.Values, m, n, biasMultiplierData.Values, 1, m, 0f, biasData.Values); } } if (propagateDown[0]) { provider.MatrixMultiplyWithUpdate(Transpose.DontTranspose, Transpose.DontTranspose, 1f, topDiff.Values, m, n, weightsData.Values, n, k, 0f, bottomData.Values); } } }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var topData = top[0].Data; var power = this.Parameters.Power; var shift = this.Parameters.Shift; var scale = this.Parameters.Scale; // Special case where we can ignore the input: scale or power is 0. if (this.Parameters.DiffScale == 0f) { double value = (power == 0f) ? 1.0d : Math.Pow(shift, power); topData.Map(v => value, topData, Zeros.Include); return(0); } // TODO Math.Pow with doubles is numerically highly unstable. Consider change everything to doubles or build a more stable version. var bottomData = bottom[0].Data; if (power != 1) { bottomData.Map(v => Math.Pow((v) * scale + shift, power), topData, Zeros.Include); } else { bottomData.Map(v => v * scale + shift, topData, Zeros.Include); } return(0); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { var topDiff = top[0].Diff; var bottomDiff = bottom[0].Diff; var topData = top[0].Data; int num = bottom[0].Num; int dim = bottom[0].Count / num; // Copy gradients to the bottom layer. topDiff.CopyTo(bottomDiff); for (int n = 0; n < num; n++) { int offset = n * dim; // REMARK: Numerically unstable dot implementation. double scale = 0; for (int i = 0; i < dim; i++) { scale += topDiff[offset + i] * topData[offset + i]; } for (int i = 0; i < dim; i++) { bottomDiff[offset + i] = (topDiff[offset + i] - scale) * topData[offset + i]; } } }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var provider = Control.LinearAlgebraProvider; using (var weightsCpu = this.weights.OnCpu()) { var bottomData = (DenseVector)bottom[0].Data; var topData = (DenseVector)top[0].Data; var weightsData = (DenseVector)weightsCpu.Data; provider.MatrixMultiplyWithUpdate(Transpose.DontTranspose, Transpose.Transpose, 1f, bottomData.Values, m, k, weightsData.Values, n, k, 0f, topData.Values); if (Parameters.UseBias) { using (var biasCpu = this.bias.OnCpu()) using (var biasMultiplierCpu = this.biasMultiplier.OnCpu()) { var biasData = (DenseVector)biasCpu.Data; var biasMultiplierData = (DenseVector)biasMultiplierCpu.Data; provider.MatrixMultiplyWithUpdate(Transpose.DontTranspose, Transpose.DontTranspose, 1f, biasMultiplierData.Values, m, 1, biasData.Values, 1, n, 1f, topData.Values); } } } return(0); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (propagateDown[1]) { throw new NotSupportedException("SoftmaxLossLayer cannot back-propagate to label inputs."); } if (propagateDown[0]) { var bottomDiff = bottom[0].Diff; var labels = bottom[1].Data; int num = bottom[0].Num; int dim = bottom[0].Count / num; for (int i = 0; i < num; i++) { bottomDiff[i * dim + (int)labels[i]] -= 1; } // Scale down gradient double scale = 1f / num; bottomDiff.Map(v => v * scale, Zeros.Include); } }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; int num = bottom[0].Num; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size padding = this.Parameters.Padding; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Zero the output topData.Map(v => 0, result: topData); // Main loop int bottomOffset = 0; int topOffset = 0; for (int n = 0; n < num; n++) { for (int c = 0; c < channels; c++) { for (int ph = 0; ph < Pooled.Height; ph++) { for (int pw = 0; pw < Pooled.Width; pw++) { int hstart = ph * stride.Height - padding.Height; int wstart = pw * stride.Width - padding.Width; int hend = Math.Min(hstart + kernel.Height, height + padding.Height); int wend = Math.Min(wstart + kernel.Width, width + padding.Width); int pool_size = (hend - hstart) * (wend - wstart); hstart = Math.Max(hstart, 0); wstart = Math.Max(wstart, 0); hend = Math.Min(hend, height); wend = Math.Min(wend, width); for (int h = hstart; h < hend; h++) { for (int w = wstart; w < wend; w++) { topData[topOffset + ph * Pooled.Width + pw] += bottomData[bottomOffset + h * width + w]; } } topData[topOffset + ph * Pooled.Width + pw] /= pool_size; } } bottomOffset += bottom[0].Offset(0, 1); topOffset += top[0].Offset(0, 1); } } return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; bottomData.MapIndexed((i, v) => (v > 0) ? v + Math.Log(1.0d + Math.Exp(-v)) : Math.Log(1.0d + Math.Exp(v)), topData, Zeros.Include); return(0); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { var bottomDiff = bottom[0].Diff; var topDiff = top[0].Diff; int num = bottom[0].Num; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size padding = this.Parameters.Padding; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Zero the output bottomDiff.Map(v => 0, result: bottomDiff); // Main loop int bottomOffset = 0; int topOffset = 0; for (int n = 0; n < num; n++) { for (int c = 0; c < channels; c++) { for (int ph = 0; ph < Pooled.Height; ph++) { for (int pw = 0; pw < Pooled.Width; pw++) { int hstart = ph * stride.Height - padding.Height; int wstart = pw * stride.Width - padding.Width; int hend = Math.Min(hstart + kernel.Height, height + padding.Height); int wend = Math.Min(wstart + kernel.Width, width + padding.Width); int pool_size = (hend - hstart) * (wend - wstart); hstart = Math.Max(hstart, 0); wstart = Math.Max(wstart, 0); hend = Math.Min(hend, height); wend = Math.Min(wend, width); int pos = topOffset + ph * Pooled.Width + pw; for (int h = hstart; h < hend; h++) { for (int w = wstart; w < wend; w++) { bottomDiff[bottomOffset + h * width + w] += topDiff[pos] / pool_size; } } } } bottomOffset += bottom[0].Offset(0, 1); topOffset += top[0].Offset(0, 1); } } }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; var slope = this.Parameters.NegativeSlope; bottomData.MapIndexed((i, v) => v > 0 ? v : v * slope, topData, Zeros.Include); return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { if (this.Phase == PhaseType.Train) { return(ForwardTrainCpu(bottom, top)); } else { return(ForwardTestCpu(bottom, top)); } }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (propagateDown[0]) { var topData = top[0].Data; var topDiff = top[0].Diff; var bottomDiff = bottom[0].Diff; topData.MapIndexed((i, v) => topDiff[i] * v * (1.0d - v), bottomDiff, Zeros.Include); } }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { var bottomData = bottom[0].Data; var topDiff = top[0].Diff; var bottomDiff = bottom[0].Diff; bottomData.MapIndexed((i, v) => { var expVal = Math.Exp(Math.Min(v, Threshold)); return(topDiff[i] * expVal / (expVal + 1.0d)); }, bottomDiff, Zeros.Include); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; bottomData.MapIndexed((i, v) => { var exp2x = Math.Exp(2 * v); return((exp2x - 1) / (exp2x + 1)); }, topData, Zeros.Include); return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; var threshold = this.Parameters.Threshold; int count = bottom[0].Count; bottomData.MapIndexed((i, v) => (v > threshold) ? 1.0d : 0.0d, topData, Zeros.Include); return(0); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (propagateDown[0]) { var bottomData = bottom[0].Data; var bottomDiff = bottom[0].Diff; var topDiff = top[0].Diff; var slope = this.Parameters.NegativeSlope; bottomData.MapIndexed((i, v) => topDiff[i] * (v > 0.0d ? 1.0d : 0.0d) + slope * (v <= 0 ? 1.0d : 0.0d), bottomDiff, Zeros.Include); } }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { difference = bottom[0].Data - bottom[1].Data; double loss = (difference.L2Norm() / (bottom[0].Count / 2)); // If we are expecting a value we just set it up. if (top.Count == 1) { top[0].Data[0] = loss; } return(loss); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { for (int i = 0; i < 2; i++) { if (propagateDown[i]) { double sign = (i == 0) ? 1 : -1; double alpha = sign / bottom[i].Num; var bottomDiff = bottom[i].Diff; difference.Map(v => alpha * v, bottomDiff, Zeros.Include); } } }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { var bottomDiff = bottom[0].Diff; var topDiff = top[0].Diff; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Zero the output bottomDiff.Map(v => 0, result: bottomDiff); // Main loop for (int index = 0; index < bottom.Count; index++) { // find out the local index // find out the local offset int w = index % width; int h = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; int phstart = (h < kernel.Height) ? 0 : (h - kernel.Height) / stride.Height + 1; int phend = Math.Min(h / stride.Height + 1, Pooled.Height); int pwstart = (w < kernel.Width) ? 0 : (w - kernel.Width) / stride.Width + 1; int pwend = Math.Min(w / stride.Width + 1, Pooled.Width); int topOffset = (n * channels + c) * Pooled.Height * Pooled.Width; double gradient = 0; for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { if (index == randomIndexes[topOffset + ph * Pooled.Width + pw]) { gradient += topDiff[topOffset + ph * Pooled.Width + pw]; } } } bottomDiff[index] = gradient; } }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (propagateDown[0]) { var bottomDiff = bottom[0].Diff; var topDiff = top[0].Diff; if (this.Phase == PhaseType.Train) { topDiff.PointwiseMultiply(mask, result: bottomDiff); } else { topDiff.CopyTo(bottomDiff); } } }
protected double ForwardTestCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Main loop topData.MapIndexed((index, _) => { int pw = index % Pooled.Width; int ph = (index / Pooled.Width) % Pooled.Height; int c = (index / Pooled.Width / Pooled.Height) % channels; int n = index / Pooled.Width / Pooled.Height / channels; int hstart = ph * stride.Height; int hend = Math.Min(hstart + kernel.Height, height); int wstart = pw * stride.Width; int wend = Math.Min(wstart + kernel.Width, width); int bottomOffset = (n * channels + c) * height * width; double cumulativeSum = double.Epsilon; double cumulativeValues = 0; for (int h = hstart; h < hend; h++) { for (int w = wstart; w < wend; w++) { double value = bottomData[bottomOffset + h * width + w]; cumulativeSum += value; cumulativeValues += value * value; } } return(cumulativeValues / cumulativeSum); }, topData, Zeros.Include); return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; int num = bottom[0].Num; int dim = bottom[0].Count / num; // Implementation based on http://lingpipe-blog.com/2009/03/17/softmax-without-overflow/ for (int n = 0; n < num; n++) { int offset = n * dim; double scale = double.NegativeInfinity; for (int i = 0; i < dim; i++) { if (bottomData[offset + i] > scale) { scale = bottomData[offset + i]; } } // Store the scale value to use when performing the backwards step. this.scaleVector[n] = scale; double z = 0.0d; for (int i = 0; i < dim; i++) { double value = Math.Exp(bottomData[offset + i] - scale); z += value; // Store in the cache to avoid having to calculate this value again. cache[i] = value; } for (int i = 0; i < dim; i++) { topData[offset + i] = (cache[i] / z); } } return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; if (Phase == PhaseType.Train) { var ratio = this.Parameters.Ratio; var scale = 1f / (1f - ratio); var bernoulli = new Bernoulli(1 - ratio); mask = Vector <double> .Build.SameAs(bottomData, () => scale *bernoulli.Sample()); bottomData.PointwiseMultiply(mask, result: topData); } else { bottomData.CopyTo(topData); } return(0); }
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { using (var probabilityCpu = probability.OnCpu()) { // The forward pass computes the softmax prob values. softmaxLayer.ForwardCpu(bottom, new CpuTensorScopeCollection { probabilityCpu }); var probabilityData = probabilityCpu.Data; var labels = bottom[1].Data; int num = bottom[0].Num; int dim = bottom[0].Count / num; double loss = 0; for (int i = 0; i < num; i++) { loss -= Math.Log(Math.Max(probabilityData[i * dim + (int)labels[i]], double.Epsilon)); } loss = loss / num; if (top.Count >= 1) { top[0].Data[0] = loss; } if (top.Count == 2) { top[1].Tensor.ShareData(probability); } return(loss); } }
internal virtual void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { throw new NotSupportedException(); }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (propagateDown[0]) { var bottomDiff = bottom[0].Diff; var topDiff = top[0].Diff; var diffScale = this.Parameters.DiffScale; var power = this.Parameters.Power; if (diffScale == 0 || power == 1) { bottomDiff.Map(v => diffScale, bottomDiff, Zeros.Include); } else { var bottomData = bottom[0].Data; var scale = this.Parameters.Scale; var shift = this.Parameters.Shift; // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1) // = diff_scale * y / (shift + scale * x) if (power == 2) { // Special case for y = (shift + scale * x)^2 // -> dy/dx = 2 * scale * (shift + scale * x) // = diff_scale * shift + diff_scale * scale * x if (shift != 0) { bottomData.Map(v => diffScale * shift + diffScale * scale * v, bottomDiff, Zeros.Include); } else { bottomData.Map(v => diffScale * scale * v, bottomDiff, Zeros.Include); } } else if (shift == 0) { // Special case for y = (scale * x)^power // -> dy/dx = scale * power * (scale * x)^(power - 1) // = scale * power * (scale * x)^power * (scale * x)^(-1) // = power * y / x var topData = top[0].Data; bottomData.MapIndexed((i, v) => power * (topData[i] / v), bottomDiff, Zeros.Include); } else { bottomData.CopyTo(bottomDiff); if (scale != 1) { bottomDiff.Multiply(scale, result: bottomDiff); } var topData = top[0].Data; bottomDiff.MapIndexed((i, v) => topData[i] / (v + shift), bottomDiff, Zeros.Include); if (diffScale != 1) { bottomDiff.Multiply(diffScale, result: bottomDiff); } } } if (diffScale != 0) { topDiff.PointwiseMultiply(bottomDiff, result: bottomDiff); } } }
internal override void BackwardCpu(CpuTensorScopeCollection top, IList <bool> propagateDown, CpuTensorScopeCollection bottom) { if (!propagateDown[0]) { return; } var bottomData = bottom[0].Data; var bottomDiff = bottom[0].Diff; var topData = top[0].Data; var topDiff = top[0].Diff; int count = bottom[0].Count; int num = bottom[0].Num; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size padding = this.Parameters.Padding; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Zero the output bottomDiff.Map(v => 0, result: bottomDiff); // Initialize Vector <double> inputMask; bool useTopMask = top.Count > 1; if (useTopMask) { inputMask = top[1].Data; } else { inputMask = this.maxIndexes; } // Main loop for (int index = 0; index < count; index++) { int w = index % width; int h = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; int phstart = (h + padding.Height < kernel.Height) ? 0 : (h + padding.Height - kernel.Height) / stride.Height + 1; int phend = Math.Min((h + padding.Height) / stride.Height + 1, Pooled.Height); int pwstart = (w + padding.Width < kernel.Width) ? 0 : (w + padding.Width - kernel.Width) / stride.Width + 1; int pwend = Math.Min((w + padding.Width) / stride.Width + 1, Pooled.Width); int topOffset = (n * channels + c) * Pooled.Height * Pooled.Width; double bottomDatum = bottomData[index]; double gradient = 0; for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { int topIndex = ph * Pooled.Width + pw; if (bottomDatum == topData[topOffset + topIndex]) { gradient += topDiff[topOffset + topIndex]; } } } bottomDiff[index] = gradient; } }
internal abstract double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top);
internal override double ForwardCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; int num = bottom[0].Num; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size padding = this.Parameters.Padding; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Initialize Vector <double> outputMask; bool useTopMask = top.Count > 1; if (useTopMask) { outputMask = top[1].Data; } else { outputMask = this.maxIndexes; } outputMask.Map(x => - 1f, outputMask, Zeros.Include); topData.Map(x => double.MinValue, topData, Zeros.Include); // The main loop int bottomOffset = 0; int topOffset = 0; for (int n = 0; n < num; n++) { for (int c = 0; c < channels; c++) { for (int ph = 0; ph < Pooled.Height; ph++) { for (int pw = 0; pw < Pooled.Width; pw++) { int hstart = ph * stride.Height - padding.Height; int wstart = pw * stride.Width - padding.Width; int hend = Math.Min(hstart + kernel.Height, height + padding.Height); int wend = Math.Min(wstart + kernel.Width, width + padding.Width); int pool_size = (hend - hstart) * (wend - wstart); hstart = Math.Max(hstart, 0); wstart = Math.Max(wstart, 0); hend = Math.Min(hend, height); wend = Math.Min(wend, width); int poolIndex = ph * Pooled.Width + pw; for (int h = hstart; h < hend; h++) { for (int w = wstart; w < wend; w++) { int index = h * width + w; if (bottomData[bottomOffset + index] > topData[topOffset + poolIndex]) { topData[topOffset + poolIndex] = bottomData[bottomOffset + index]; outputMask[topOffset + poolIndex] = index; } } } } } bottomOffset += bottom[0].Offset(0, 1); topOffset += top[0].Offset(0, 1); } } return(0); }
protected double ForwardTrainCpu(CpuTensorScopeCollection bottom, CpuTensorScopeCollection top) { var bottomData = bottom[0].Data; var topData = top[0].Data; int num = bottom[0].Num; int channels = bottom[0].Channels; int height = bottom[0].Height; int width = bottom[0].Width; Size padding = this.Parameters.Padding; Size stride = this.Parameters.Stride; Size kernel = this.Parameters.Kernel; // Main loop topData.MapIndexed((index, _) => { int pw = index % Pooled.Width; int ph = (index / Pooled.Width) % Pooled.Height; int c = (index / Pooled.Width / Pooled.Height) % channels; int n = index / Pooled.Width / Pooled.Height / channels; int hstart = ph * stride.Height; int hend = Math.Min(hstart + kernel.Height, height); int wstart = pw * stride.Width; int wend = Math.Min(wstart + kernel.Width, width); int bottomOffset = (n * channels + c) * height * width; // First pass: get sum double cumulativeSum = 0; for (int h = hstart; h < hend; h++) { for (int w = wstart; w < wend; w++) { cumulativeSum += bottomData[bottomOffset + h * width + w]; } } double threshold = this.randomIndexes[index] * cumulativeSum; // Second pass: get value, and set index. for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { cumulativeSum += bottomData[bottomOffset + h * width + w]; if (cumulativeSum >= threshold) { this.randomIndexes[index] = ((n * channels + c) * height + h) * width + w; return(bottomData[bottomOffset + h * width + w]); } } } this.randomIndexes[index] = ((n * channels + c) * height + (hend - 1)) * width + (wend - 1); return(bottomData[bottomOffset + (hend - 1) * width + (wend - 1)]); }, topData, Zeros.Include); return(0); }