public static Matrix Phansalkar(Matrix input, int radius) { var width = input.Width; var height = input.Height; var data = (float[])input; var result = (float[])new Matrix(width, height); var p = 2.5f; var q = 10; var k = 0.15; var r = 0.4; var dimension = GetKernelDimension(height); Gpu.Default.Launch(KernelBuilder(tid => { var index = tid * width; for (var x = 0; x < width; x++) { var sum = 0f; var count = 0; for (var ky = -radius; ky <= radius; ky++) { var py = tid + ky; var pindex = py * width; for (var kx = -radius; kx <= radius; kx++) { var px = x + kx; if (px >= 0 && px < width && py >= 0 && py < height) { sum += data[pindex + px] / byte.MaxValue; count++; } } } var mean = sum / count; var variance = DeviceFunction.Abs(sum / count - mean); var deviation = DeviceFunction.Sqrt(variance / count); var threshold = mean * (1f + p * DeviceFunction.Exp(-q * mean) + k * (deviation / r - 1)); result[index + x] = data[index + x] / byte.MaxValue > threshold ? byte.MaxValue : 0; } }, height), new LaunchParam(dimension, dimension)); return(new Matrix(width, height, result)); }
public void ForwardOptimized(Executor executor) { var ctx = executor.Context; var w = executor.GetTensor(W); var xphpb = (int)w.Shape[0]; var x = executor.GetTensor(X); var b = (int)x.Shape[1]; var n = (int)x.Shape[0]; var d = HiddenSize; var y = executor.GetTensor(Y, Shape.Create(n, b, d)); var inputSize = InputSize; var one = 1.0.AsScalar <T>(); // inital states var cx = executor.GetTensor(CX); var hx = executor.GetTensor(HX); Util.EnsureTrue(cx.Shape.SequenceEqual(Shape.Create(b, d))); Util.EnsureTrue(hx.Shape.SequenceEqual(Shape.Create(b, d))); // we assign output states to inital states, and later we update it var cy = executor.GetTensor(CY, Shape.Create(b, d)); var hy = executor.GetTensor(HY, Shape.Create(b, d)); ctx.Assign(cy, cx); ctx.Assign(hy, hx); var prevc = cy.Reshape(1, b, d); var prevh = hy.Reshape(1, b, d); var hin = executor.GetTensor(Hin, Shape.Create(n, b, xphpb)); var ifoa1 = executor.GetTensor(IFOA1, Shape.Create(n, b, d * 4)); var ifoa2 = executor.GetTensor(IFOA2, Shape.Create(n, b, d * 4)); var c = executor.GetTensor(C, Shape.Create(n, b, d)); Util.EnsureTrue(ctx.Type == ContextType.Gpu && typeof(T) == typeof(float), "Currently only support gpu and single precision."); if (ctx.Type == ContextType.Gpu && typeof(T) == typeof(float)) { var stream = ctx.ToGpuContext().Stream; var hinPtr = hin.Buffer.Ptr.Reinterpret <float>(); var xPtr = x.Buffer.Ptr.Reinterpret <float>(); var prevhPtr = prevh.Buffer.Ptr.Reinterpret <float>(); var prevcPtr = prevc.Buffer.Ptr.Reinterpret <float>(); var _ifoaPtr = ifoa1.Buffer.Ptr.Reinterpret <float>(); var ifoaPtr = ifoa2.Buffer.Ptr.Reinterpret <float>(); var cPtr = c.Buffer.Ptr.Reinterpret <float>(); var hPtr = y.Buffer.Ptr.Reinterpret <float>(); for (var t = 0; t < n; ++t) { // stack input stream.For(0, b * xphpb, i => { var bi = (int)i / xphpb; var _i = (int)i % xphpb; if (_i >= 1 + inputSize) // for hidden { var di = _i - 1 - inputSize; hinPtr[t * b * xphpb + bi * xphpb + _i] = prevhPtr[bi * d + di]; } else if (_i >= 1) { var ii = _i - 1; hinPtr[t * b * xphpb + bi * xphpb + _i] = xPtr[t * b * inputSize + bi * inputSize + ii]; } else { hinPtr[t * b * xphpb + bi * xphpb + _i] = 1.0f; // bias } }); // dot ctx.Assign(ifoa1.Slice(t), Dot(hin.Slice(t).Reshape(b, xphpb), w)); // element-wise op stream.For(0, b * d, i => { var bi = (int)i / d; var di = (int)i % d; var offset1 = t * b * d + bi * d; // for (n, b, d) var offset2 = t * b * 4 * d + bi * 4 * d; // for (n, b, 4*d) var offsetI = offset2; var offsetF = offset2 + d; var offsetO = offset2 + 2 * d; var offsetA = offset2 + 3 * d; var prevct = prevcPtr[bi * d + di]; var _it = _ifoaPtr[offsetI + di]; var _ft = _ifoaPtr[offsetF + di]; var _ot = _ifoaPtr[offsetO + di]; var _at = _ifoaPtr[offsetA + di]; // non-linearities // a are tanh, others are sigmoid var it = 1.0f / (1.0f + DeviceFunction.Exp(-_it)); var ft = 1.0f / (1.0f + DeviceFunction.Exp(-_ft)); var ot = 1.0f / (1.0f + DeviceFunction.Exp(-_ot)); var at = DeviceFunction.Tanh(_at); // c_t = i_t * a_t + f_t * c_t-1 var ct = it * at + ft * prevct; // h_t = o_t * tanh(c_t) var ht = ot * DeviceFunction.Tanh(ct); ifoaPtr[offsetI + di] = it; ifoaPtr[offsetF + di] = ft; ifoaPtr[offsetO + di] = ot; ifoaPtr[offsetA + di] = at; cPtr[offset1 + di] = ct; hPtr[offset1 + di] = ht; prevhPtr[bi * d + di] = ht; prevcPtr[bi * d + di] = ct; }); } } }