private static unsafe void V128ForLoop([NoAlias] u8 *src, [NoAlias] u32 *dst, int count) { // Input: // // 0123 4567 8901 2345 // RGBR GBRG BRGB RGBR // Output: // RGBA RGBA RGBA RGBA var shuffle = setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1); var alpha = set1_epi32(0xFF << 24); int i = 0; var alignedCount = count & ~3; for (; i < alignedCount; i += 4) { var v0 = loadu_ps(src + i * 3); var v1 = shuffle_epi8(v0, shuffle); var v2 = or_ps(v1, alpha); storeu_ps((dst + i), v2); } for (; i < count; i++) { dst[i] = (u32)src[i * 3 + 0] << 0 | (u32)src[i * 3 + 1] << 8 | (u32)src[i * 3 + 2] << 16 | (u32)0xFF << 24; } }
private static unsafe void BurstForLoop([NoAlias] u8 *src, [NoAlias] u32 *dst, int count) { for (int i = 0; i < count; i++) { dst[i] = (u32)src[i * 3 + 0] << 0 | (u32)src[i * 3 + 1] << 8 | (u32)src[i * 3 + 2] << 16 | (u32)0xFF << 24; } }
private static unsafe void V256ForLoop([NoAlias] u8 *src, [NoAlias] u32 *dst, int count) { // Input // u128 0 1 // u64 0 1 2 3 // u32 0 1 2 3 4 5 6 7 // u16 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 // u8 0123 4567 8901 2345 6789 0123 4567 8901 // RGBR GBRG BRGB RGBR GBRG BRGB ---- ---- // Output // RGBA RGBA RGBA RGBA RGBA RGBA RBGA RGBA // Path // v0 = 0123 4567 89AB CDEF GHIJ KLMN ---- ---- // v1 = 0123 4567 89AB ---- CDEF GHIJ KLMN ---- // v2 = 012- 345- 678- 9AB- CDE- FGH- IJK- LMN- // v3 = 012α 345α 678α 9ABα CDEα FGHα IJKα LMNα var alignedCount = count & ~7; var permute = mm256_setr_epi32(0, 1, 2, 0xFF, 3, 4, 5, 0xFF); var shuffleV128 = setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1); var shuffleV256 = mm256_setr_m128(shuffleV128, shuffleV128); var alpha = mm256_set1_epi32(0xFF << 24); int i = 0; for (; i < alignedCount; i += 8) { var v0 = mm256_loadu_ps(src + i * 3); var v1 = mm256_permutevar8x32_epi32(v0, permute); var v2 = mm256_shuffle_epi8(v1, shuffleV256); var v3 = mm256_or_ps(v2, alpha); mm256_storeu_ps(dst + i, v3); } for (; i < count; i++) { dst[i] = (u32)src[i * 3 + 0] << 0 | (u32)src[i * 3 + 1] << 8 | (u32)src[i * 3 + 2] << 16 | (u32)0xFF << 24; } }
//------------------------------------------------- // char_expand - expand the raw data for a // character into a bitmap //------------------------------------------------- void char_expand(char32_t chnum, glyph gl) { LOG("render_font::char_expand: expanding character {0}\n", chnum); rgb_t fgcol = (gl.color != 0 ? gl.color : new rgb_t(0xff, 0xff, 0xff, 0xff)); rgb_t bgcol = new rgb_t(0x00, 0xff, 0xff, 0xff); bool is_cmd = ((chnum >= COMMAND_UNICODE) && (chnum < COMMAND_UNICODE + MAX_GLYPH_FONT)); if (is_cmd) { throw new emu_unimplemented(); #if false // punt if nothing there if (gl.bmwidth == 0 || gl.bmheight == 0 || gl.rawdata == null) { return; } // allocate a new bitmap of the size we need gl.bitmap.allocate(gl.bmwidth, m_height_cmd); gl.bitmap.fill(0); // extract the data const char *ptr = gl.rawdata; byte accum = 0; byte accumbit = 7; for (int y = 0; y < gl.bmheight; y++) { int desty = y + m_height_cmd + m_yoffs_cmd - gl.yoffs - gl.bmheight; u32 *dest = (desty >= 0 && desty < m_height_cmd) ? &gl.bitmap.pix(desty, 0) : nullptr; { for (int x = 0; x < gl.bmwidth; x++) { if (accumbit == 7) { accum = *ptr++; } if (dest != null) { *dest++ = (accum & (1 << accumbit)) ? color : rgb_t(0x00, 0xff, 0xff, 0xff); } accumbit = (accumbit - 1) & 7; } } } #endif } else if (m_format == format.OSD) { throw new emu_unimplemented(); #if false // if we're an OSD font, query the info #endif } else if (gl.bmwidth == 0 || gl.bmheight == 0 || gl.rawdata == null) { // abort if nothing there LOG("render_font::char_expand: empty bitmap bounds or no raw data\n"); return; } else { // other formats need to parse their data LOG("render_font::char_expand: building bitmap from raw data\n"); // allocate a new bitmap of the size we need gl.bitmap.allocate(gl.bmwidth, m_height); gl.bitmap.fill(0); // extract the data Pointer <u8> ptr = new Pointer <u8>(gl.rawdata); //const char *ptr = gl.rawdata; u8 accum = 0; u8 accumbit = 7; for (int y = 0; y < gl.bmheight; y++) { int desty = y + m_height + m_yoffs - gl.yoffs - gl.bmheight; PointerU32 dest = ((0 <= desty) && (m_height > desty)) ? gl.bitmap.pix(desty) : null; //u32 *dest(((0 <= desty) && (m_height > desty)) ? &gl.bitmap.pix(desty) : nullptr); if (m_format == format.TEXT) { if (dest != null) { for (int x = 0; gl.bmwidth > x;) { // scan for the next hex digit int bits = -1; while ('\r' != ptr[0] && '\n' != ptr[0] && 0 > bits) // while (('\r' != *ptr) && ('\n' != *ptr) && (0 > bits)) { if (ptr[0] >= '0' && ptr[0] <= '9') { bits = ptr[0] - '0'; //bits = *ptr++ - '0'; ptr++; } else if (ptr[0] >= 'A' && ptr[0] <= 'F') { bits = ptr[0] - 'A' + 10; ptr++; } else if (ptr[0] >= 'a' && ptr[0] <= 'f') { bits = ptr[0] - 'a' + 10; ptr++; } else { ptr++; } } // expand the four bits dest[0] = (bits & 8) != 0 ? fgcol : bgcol; dest++; //*dest++ = (bits & 8) ? fgcol : bgcol; if (gl.bmwidth > ++x) { dest[0] = (bits & 4) != 0 ? fgcol : bgcol; dest++; } //*dest++ = (bits & 4) ? fgcol : bgcol; if (gl.bmwidth > ++x) { dest[0] = (bits & 2) != 0 ? fgcol : bgcol; dest++; } //*dest++ = (bits & 2) ? fgcol : bgcol; if (gl.bmwidth > ++x) { dest[0] = (bits & 1) != 0 ? fgcol : bgcol; dest++; } //*dest++ = (bits & 1) ? fgcol : bgcol; x++; } // advance to the next line ptr = next_line(ptr); } } else if (m_format == format.CACHED) { for (int x = 0; x < gl.bmwidth; x++) { if (accumbit == 7) { accum = ptr[0]; ptr++; } if (dest != null) { dest[0] = (accum & (1 << accumbit)) != 0 ? fgcol : bgcol; dest++; } //*dest++ = (accum & (1 << accumbit)) ? fgcol : bgcol; accumbit = (u8)((accumbit - 1) & 7); } } } } // wrap a texture around the bitmap gl.texture = m_manager.texture_alloc(render_texture.hq_scale); gl.texture.set_bitmap(gl.bitmap, gl.bitmap.cliprect(), texture_format.TEXFORMAT_ARGB32); }