protected override void Compile(CodeContext c) { var dst = c.IntPtr("dst"); var src = c.IntPtr("src"); var cnt = c.UIntPtr("cnt"); var loop = c.Label(); // Create base labels we use var exit = c.Label(); // in our function. c.SetArgument(0, dst); c.SetArgument(1, src); c.SetArgument(2, cnt); c.Allocate(dst); // Allocate all registers now, c.Allocate(src); // because we want to keep them c.Allocate(cnt); // in physical registers only. c.Test(cnt, cnt); // Exit if length is zero. c.Jz(exit); c.Bind(loop); // Bind the loop label here. var tmp = c.Int32("tmp"); // Copy a single dword (4 bytes). c.Mov(tmp, Memory.DWord(src)); c.Mov(Memory.DWord(dst), tmp); c.Add(src, 4); // Increment dst/src pointers. c.Add(dst, 4); c.Dec(cnt); // Loop until cnt isn't zero. c.Jnz(loop); c.Bind(exit); // Bind the exit label here. }
protected override void Compile(CodeContext c) { var var = new GpVariable[32]; var a = c.IntPtr("a"); c.SetArgument(0, a); int i; for (i = 0; i < var.Length; i++) { var[i] = c.Int32("var" + i); } for (i = 0; i < var.Length; i++) { c.Xor(var[i], var[i]); } var v0 = c.Int32("v0"); var l = c.Label(); c.Mov(v0, 32); c.Bind(l); for (i = 0; i < var.Length; i++) { c.Add(var[i], i); } c.Dec(v0); c.Jnz(l); for (i = 0; i < var.Length; i++) { c.Mov(Memory.DWord(a, i * 4), var[i]); } }
protected override void Compile(CodeContext c) { var v0 = c.Int32("v0"); var v1 = c.Int32("v1"); var cnt = c.Int32("cnt"); c.Xor(v0, v0); c.Xor(v1, v1); c.Spill(v0); c.Spill(v1); var l = c.Label(); c.Mov(cnt, 32); c.Bind(l); c.Inc(v1); c.Add(v0, v1); c.Dec(cnt); c.Jnz(l); c.Ret(v0); }
protected override void Compile(CodeContext c) { var dst = c.IntPtr("dst"); var src = c.IntPtr("src"); var i = c.IntPtr("i"); var j = c.IntPtr("j"); var t = c.IntPtr("t"); var cZero = c.Xmm("cZero"); var cMul255A = c.Xmm("cMul255A"); var cMul255M = c.Xmm("cMul255M"); var x0 = c.Xmm("x0"); var x1 = c.Xmm("x1"); var y0 = c.Xmm("y0"); var a0 = c.Xmm("a0"); var a1 = c.Xmm("a1"); var smallLoop = c.Label(); var smallEnd = c.Label(); var largeLoop = c.Label(); var largeEnd = c.Label(); var data = c.Label(); c.SetArgument(0, dst); c.SetArgument(1, src); c.SetArgument(2, i); c.Allocate(dst); c.Allocate(src); c.Allocate(i); // How many pixels have to be processed to make the loop aligned. c.Lea(t, Memory.Ptr(data)); c.Xor(j, j); c.Xorps(cZero, cZero); c.Sub(j, dst); c.Movaps(cMul255A, Memory.Ptr(t, 0)); c.And(j, 15); c.Movaps(cMul255M, Memory.Ptr(t, 16)); c.Shr(j, 2); c.Jz(smallEnd); // j = min(i, j). c.Cmp(j, i); c.Cmovg(j, i); // i -= j. c.Sub(i, j); // Small loop. c.Bind(smallLoop); c.Pcmpeqb(a0, a0); c.Movd(y0, Memory.Ptr(src)); c.Pxor(a0, y0); c.Movd(x0, Memory.Ptr(dst)); c.Psrlw(a0, 8); c.Punpcklbw(x0, cZero); c.Pshuflw(a0, a0, AsmJit.Common.Utils.Shuffle(1, 1, 1, 1)); c.Punpcklbw(y0, cZero); c.Pmullw(x0, a0); c.Paddsw(x0, cMul255A); c.Pmulhuw(x0, cMul255M); c.Paddw(x0, y0); c.Packuswb(x0, x0); c.Movd(Memory.Ptr(dst), x0); c.Add(dst, 4); c.Add(src, 4); c.Dec(j); c.Jnz(smallLoop); // Second section, prepare for an aligned loop. c.Bind(smallEnd); c.Test(i, i); c.Mov(j, i); c.Jz(c.Exit); c.And(j, 3); c.Shr(i, 2); c.Jz(largeEnd); // Aligned loop. c.Bind(largeLoop); c.Movups(y0, Memory.Ptr(src)); c.Pcmpeqb(a0, a0); c.Movaps(x0, Memory.Ptr(dst)); c.Xorps(a0, y0); c.Movaps(x1, x0); c.Psrlw(a0, 8); c.Punpcklbw(x0, cZero); c.Movaps(a1, a0); c.Punpcklwd(a0, a0); c.Punpckhbw(x1, cZero); c.Punpckhwd(a1, a1); c.Pshufd(a0, a0, AsmJit.Common.Utils.Shuffle(3, 3, 1, 1)); c.Pshufd(a1, a1, AsmJit.Common.Utils.Shuffle(3, 3, 1, 1)); c.Pmullw(x0, a0); c.Pmullw(x1, a1); c.Paddsw(x0, cMul255A); c.Paddsw(x1, cMul255A); c.Pmulhuw(x0, cMul255M); c.Pmulhuw(x1, cMul255M); c.Add(src, 16); c.Packuswb(x0, x1); c.Paddw(x0, y0); c.Movaps(Memory.Ptr(dst), x0); c.Add(dst, 16); c.Dec(i); c.Jnz(largeLoop); c.Bind(largeEnd); c.Test(j, j); c.Jnz(smallLoop); // Data c.Data(data, 16, Data.Of(0x0080008000800080, 0x0080008000800080), Data.Of(0x0101010101010101, 0x0080008000800080)); }