public unsafe void RgbaToYcbCrScaledIntegerLut() { // Copy the input to the stack: OnStackInputCache.Byte input = OnStackInputCache.Byte.Create(this.inputSourceRGB); // On-stack output: Result result = default(Result); float *yPtr = (float *)&result.Y; float *cbPtr = (float *)&result.Cb; float *crPtr = (float *)&result.Cr; // end of code-bloat block :) for (int i = 0; i < InputColorCount; i++) { int i3 = i * 3; int r = input.Data[i3 + 0]; int g = input.Data[i3 + 1]; int b = input.Data[i3 + 2]; // TODO: Maybe concatenating all the arrays in LookupTables to a flat one can improve this! *yPtr++ = (LookupTables.Y0[r] + LookupTables.Y1[g] + LookupTables.Y2[b]) >> 10; *cbPtr++ = 128 + ((LookupTables.Cb0[r] - LookupTables.Cb1[g] + LookupTables.Cb2Cr0[b]) >> 10); *crPtr++ = 128 + ((LookupTables.Cb2Cr0[r] - LookupTables.Cr1[g] - LookupTables.Cr2[b]) >> 10); } }
public unsafe void RgbaToYcbCrScalarFloat() { // Copy the input to the stack: OnStackInputCache.Byte input = OnStackInputCache.Byte.Create(this.inputSourceRGB); // On-stack output: Result result = default(Result); float *yPtr = (float *)&result.Y; float *cbPtr = (float *)&result.Cb; float *crPtr = (float *)&result.Cr; // end of code-bloat block :) for (int i = 0; i < InputColorCount; i++) { int i3 = i * 3; float r = input.Data[i3 + 0]; float g = input.Data[i3 + 1]; float b = input.Data[i3 + 2]; *yPtr++ = (0.299F * r) + (0.587F * g) + (0.114F * b); *cbPtr++ = 128 + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); *crPtr++ = 128 + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); } }
public unsafe void RgbaToYcbCrSimdFloat() { // Copy the input to the stack: OnStackInputCache.Byte input = OnStackInputCache.Byte.Create(this.inputSourceRGB); // On-stack output: Result result = default(Result); float *yPtr = (float *)&result.Y; float *cbPtr = (float *)&result.Cb; float *crPtr = (float *)&result.Cr; // end of code-bloat block :) for (int i = 0; i < InputColorCount; i++) { int i3 = i * 3; Vector3 vectorRgb = new Vector3( input.Data[i3 + 0], input.Data[i3 + 1], input.Data[i3 + 2] ); Vector3 vectorY = VectorY * vectorRgb; Vector3 vectorCb = VectorCb * vectorRgb; Vector3 vectorCr = VectorCr * vectorRgb; *yPtr++ = vectorY.X + vectorY.Y + vectorY.Z; *cbPtr++ = 128 + (vectorCb.X - vectorCb.Y + vectorCb.Z); *crPtr++ = 128 + (vectorCr.X - vectorCr.Y - vectorCr.Z); } }
public unsafe void RgbaToYcbCrSimdFloat() { // Copy the input to the stack: OnStackInputCache.Byte input = OnStackInputCache.Byte.Create(this.inputSourceRGB); // On-stack output: Result result = default(Result); float *yPtr = (float *)&result.Y; float *cbPtr = (float *)&result.Cb; float *crPtr = (float *)&result.Cr; // end of code-bloat block :) for (int i = 0; i < InputColorCount; i++) { int i3 = i * 3; Vector3 vectorRgb = new Vector3( input.Data[i3 + 0], input.Data[i3 + 1], input.Data[i3 + 2] ); Vector3 vectorY = VectorY * vectorRgb; Vector3 vectorCb = VectorCb * vectorRgb; Vector3 vectorCr = VectorCr * vectorRgb; // Should be better in theory, but came out to be worse: :( // Vector3 c = new Vector3(0, 128, 128); // Vector3 xx = new Vector3(vectorY.X, vectorCb.X, vectorCr.X); // Vector3 yy = new Vector3(vectorY.Y, -vectorCb.Y, -vectorCr.Y); // Vector3 zz = new Vector3(vectorY.Z, vectorCb.Z, -vectorCr.Z); // c += xx + yy + zz; // *yPtr++ = c.X; // *cbPtr++ = c.Y; // *crPtr++ = c.Z; *yPtr++ = vectorY.X + vectorY.Y + vectorY.Z; *cbPtr++ = 128 + (vectorCb.X - vectorCb.Y + vectorCb.Z); *crPtr++ = 128 + (vectorCr.X - vectorCr.Y - vectorCr.Z); } }
public unsafe void RgbaToYcbCrScaledInteger() { // Copy the input to the stack: OnStackInputCache.Byte input = OnStackInputCache.Byte.Create(this.inputSourceRGB); // On-stack output: Result result = default(Result); float *yPtr = (float *)&result.Y; float *cbPtr = (float *)&result.Cb; float *crPtr = (float *)&result.Cr; // end of code-bloat block :) for (int i = 0; i < InputColorCount; i++) { int i3 = i * 3; int r = input.Data[i3 + 0]; int g = input.Data[i3 + 1]; int b = input.Data[i3 + 2]; // Scale by 1024, add .5F and truncate value int y0 = 306 * r; // (0.299F * 1024) + .5F int y1 = 601 * g; // (0.587F * 1024) + .5F int y2 = 117 * b; // (0.114F * 1024) + .5F int cb0 = -172 * r; // (-0.168736F * 1024) + .5F int cb1 = 339 * g; // (0.331264F * 1024) + .5F int cb2 = 512 * b; // (0.5F * 1024) + .5F int cr0 = 512 * r; // (0.5F * 1024) + .5F int cr1 = 429 * g; // (0.418688F * 1024) + .5F int cr2 = 83 * b; // (0.081312F * 1024) + .5F *yPtr++ = (y0 + y1 + y2) >> 10; *cbPtr++ = 128 + ((cb0 - cb1 + cb2) >> 10); *crPtr++ = 128 + ((cr0 - cr1 - cr2) >> 10); } }