/// <summary> /// /// </summary> /// <param name="color"></param> /// <param name="c0"></param> /// <param name="c1"></param> static void stb__EvalColors(ARGB_Rev *color, ushort c0, ushort c1) { stb__From16Bit(out color[0], c0); stb__From16Bit(out color[1], c1); stb__Lerp13RGB(ref color[2], color[0], color[1]); stb__Lerp13RGB(ref color[3], color[1], color[0]); }
/// <summary> /// /// </summary> /// <param name="dest"></param> /// <param name="src"></param> /// <param name="alpha"></param> /// <param name="mode"></param> static private void stb_compress_dxt_block(byte *dest, ARGB_Rev *src, bool alpha, CompressionMode mode) { if (init) { stb__InitDXT(); init = false; } if (alpha) { stb__CompressAlphaBlock(dest, src, mode); scramble(dest); dest += 8; } stb__CompressColorBlock(dest, src, mode); scramble(dest); }
/// <summary> /// Block dithering function. Simply dithers a block to 565 RGB. /// (Floyd-Steinberg) /// </summary> /// <param name="dest"></param> /// <param name="block"></param> static void stb__DitherBlock(ARGB_Rev *dest, ARGB_Rev *block) { var err = stackalloc int[8]; int *ep1 = err; int *ep2 = err + 4; int *et; int ch, y; fixed(byte *_stb__QuantGTab = stb__QuantGTab) fixed(byte *_stb__QuantRBTab = stb__QuantRBTab) { var bpList = new byte *[] {&block->R, &block->G, &block->B }; var dpList = new byte *[] {&dest->R, &dest->G, &dest->B }; // process channels seperately for (ch = 0; ch < 3; ++ch) { byte *bp = bpList[ch]; byte *dp = dpList[ch]; byte *quant = (ch == 1) ? _stb__QuantGTab + 8 : _stb__QuantRBTab + 8; PointerUtils.Memset((byte *)err, 0, sizeof(int) * 8); for (y = 0; y < 4; ++y) { dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; ep1[0] = bp[0] - dp[0]; dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; ep1[1] = bp[4] - dp[4]; dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; ep1[2] = bp[8] - dp[8]; dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; ep1[3] = bp[12] - dp[12]; bp += 16; dp += 16; et = ep1; ep1 = ep2; ep2 = et; // swap } } } }
/// <summary> /// Alpha block compression (this is easy for a change) /// </summary> /// <param name="dest"></param> /// <param name="src"></param> /// <param name="mode"></param> static void stb__CompressAlphaBlock(byte *dest, ARGB_Rev *src, CompressionMode mode) { int i, dist, bias, dist4, dist2, bits, mask; // find min/max color int mn, mx; mn = mx = src[0].A; for (i = 1; i < 16; i++) { if (src[i].A < mn) { mn = src[i].A; } else if (src[i].A > mx) { mx = src[i].A; } } // encode them ((byte *)dest)[0] = (byte)mx; ((byte *)dest)[1] = (byte)mn; dest += 2; // determine bias and emit color indices // given the choice of mx/mn, these indices are optimal: // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/ dist = mx - mn; dist4 = dist * 4; dist2 = dist * 2; bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2); bias -= mn * 7; bits = 0; mask = 0; for (i = 0; i < 16; i++) { int a = src[i].A * 7 + bias; int ind, t; // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max). t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t; t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t; ind += (a >= dist) ? 1 : 0; // turn linear scale into DXT index (0/1 are extremal pts) ind = -ind & 7; ind ^= (2 > ind) ? 1 : 0; // write index mask |= ind << bits; if ((bits += 3) >= 8) { *dest++ = (byte)mask; mask >>= 8; bits -= 8; } } }
/// <summary> /// Color block compression /// </summary> /// <param name="dest"></param> /// <param name="block"></param> /// <param name="mode"></param> static void stb__CompressColorBlock(byte *dest, ARGB_Rev *block, CompressionMode mode) { uint mask; int i; bool dither; int refinecount; ushort max16, min16; var dblock = stackalloc ARGB_Rev[16]; var color = stackalloc ARGB_Rev[4]; dither = (mode & CompressionMode.Dither) != 0; refinecount = ((mode & CompressionMode.HighQuality) != 0) ? 2 : 1; // check if block is constant for (i = 1; i < 16; i++) { if (((uint *)block)[i] != ((uint *)block)[0]) { break; } } if (i == 16) { // constant color int r = block[0].R, g = block[0].G, b = block[0].B; mask = 0xaaaaaaaa; max16 = (ushort)((stb__OMatch5[r, 0] << 11) | (stb__OMatch6[g, 0] << 5) | stb__OMatch5[b, 0]); min16 = (ushort)((stb__OMatch5[r, 1] << 11) | (stb__OMatch6[g, 1] << 5) | stb__OMatch5[b, 1]); } else { // first step: compute dithered version for PCA if desired if (dither) { stb__DitherBlock(dblock, block); } // second step: pca+map along principal axis stb__OptimizeColorsBlock(dither ? dblock : block, &max16, &min16); if (max16 != min16) { stb__EvalColors(color, max16, min16); mask = stb__MatchColorsBlock(block, color, dither); } else { mask = 0; } // third step: refine (multiple times if requested) for (i = 0; i < refinecount; i++) { uint lastmask = mask; if (stb__RefineBlock(dither ? dblock : block, &max16, &min16, mask)) { if (max16 != min16) { stb__EvalColors(color, max16, min16); mask = stb__MatchColorsBlock(block, color, dither); } else { mask = 0; break; } } if (mask == lastmask) { break; } } } // write the color block if (max16 < min16) { ushort t = min16; min16 = max16; max16 = t; mask ^= 0x55555555; } dest[0] = (byte)(max16); dest[1] = (byte)(max16 >> 8); dest[2] = (byte)(min16); dest[3] = (byte)(min16 >> 8); dest[4] = (byte)(mask); dest[5] = (byte)(mask >> 8); dest[6] = (byte)(mask >> 16); dest[7] = (byte)(mask >> 24); }
/// <summary> /// The refinement function. (Clever code, part 2) /// Tries to optimize colors to suit block contents better. /// (By solving a least squares system via normal equations+Cramer's rule) /// </summary> /// <param name="block"></param> /// <param name="pmax16"></param> /// <param name="pmin16"></param> /// <param name="mask"></param> /// <returns></returns> static bool stb__RefineBlock(ARGB_Rev *block, ushort *pmax16, ushort *pmin16, uint mask) { var w1Tab = new int[4] { 3, 0, 2, 1 }; var prods = new int[4] { 0x090000, 0x000900, 0x040102, 0x010402 }; // ^some magic to save a lot of multiplies in the accumulating loop... // (precomputed products of weights for least squares system, accumulated inside one 32-bit register) float frb, fg; ushort oldMin, oldMax, min16, max16; int i, akku = 0, xx, xy, yy; int At1_r, At1_g, At1_b; int At2_r, At2_g, At2_b; uint cm = mask; oldMin = *pmin16; oldMax = *pmax16; if ((mask ^ (mask << 2)) < 4) // all pixels have the same index? { // yes, linear system would be singular; solve using optimal // single-color match on average color int r = 8, g = 8, b = 8; for (i = 0; i < 16; ++i) { r += block[i].R; g += block[i].G; b += block[i].B; } r >>= 4; g >>= 4; b >>= 4; max16 = (ushort)((stb__OMatch5[r, 0] << 11) | (stb__OMatch6[g, 0] << 5) | stb__OMatch5[b, 0]); min16 = (ushort)((stb__OMatch5[r, 1] << 11) | (stb__OMatch6[g, 1] << 5) | stb__OMatch5[b, 1]); } else { At1_r = At1_g = At1_b = 0; At2_r = At2_g = At2_b = 0; for (i = 0; i < 16; ++i, cm >>= 2) { int step = (int)(cm & 3); int w1 = w1Tab[step]; int r = block[i].R; int g = block[i].G; int b = block[i].B; akku += prods[step]; At1_r += w1 * r; At1_g += w1 * g; At1_b += w1 * b; At2_r += r; At2_g += g; At2_b += b; } At2_r = 3 * At2_r - At1_r; At2_g = 3 * At2_g - At1_g; At2_b = 3 * At2_b - At1_b; // extract solutions and decide solvability xx = akku >> 16; yy = (akku >> 8) & 0xff; xy = (akku >> 0) & 0xff; frb = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy); fg = frb * 63.0f / 31.0f; // solve. max16 = (ushort)(stb__sclamp((At1_r * yy - At2_r * xy) * frb + 0.5f, 0, 31) << 11); max16 |= (ushort)(stb__sclamp((At1_g * yy - At2_g * xy) * fg + 0.5f, 0, 63) << 5); max16 |= (ushort)(stb__sclamp((At1_b * yy - At2_b * xy) * frb + 0.5f, 0, 31) << 0); min16 = (ushort)(stb__sclamp((At2_r * xx - At1_r * xy) * frb + 0.5f, 0, 31) << 11); min16 |= (ushort)(stb__sclamp((At2_g * xx - At1_g * xy) * fg + 0.5f, 0, 63) << 5); min16 |= (ushort)(stb__sclamp((At2_b * xx - At1_b * xy) * frb + 0.5f, 0, 31) << 0); } *pmin16 = min16; *pmax16 = max16; return((oldMin != min16) || (oldMax != max16)); }
/// <summary> /// The color optimization function. (Clever code, part 1) /// </summary> /// <param name="block"></param> /// <param name="pmax16"></param> /// <param name="pmin16"></param> static void stb__OptimizeColorsBlock(ARGB_Rev *block, ushort *pmax16, ushort *pmin16) { int mind = 0x7fffffff, maxd = -0x7fffffff; var minp = default(ARGB_Rev); var maxp = default(ARGB_Rev); double magn; int v_r, v_g, v_b; const int nIterPower = 4; float * covf = stackalloc float[6]; float vfr, vfg, vfb; // determine color distribution var cov = stackalloc int[6]; var mu = stackalloc int[3]; var min = stackalloc int[3]; var max = stackalloc int[3]; int ch, i, iter; for (ch = 0; ch < 3; ch++) { byte *bp = ((byte *)block) + ch; int muv, minv, maxv; muv = minv = maxv = bp[0]; for (i = 4; i < 64; i += 4) { muv += bp[i]; if (bp[i] < minv) { minv = bp[i]; } else if (bp[i] > maxv) { maxv = bp[i]; } } mu[ch] = (muv + 8) >> 4; min[ch] = minv; max[ch] = maxv; } // determine covariance matrix for (i = 0; i < 6; i++) { cov[i] = 0; } for (i = 0; i < 16; i++) { int r = block[i].R - mu[0]; int g = block[i].G - mu[1]; int b = block[i].B - mu[2]; cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; } // convert covariance matrix to float, find principal axis via power iter for (i = 0; i < 6; i++) { covf[i] = cov[i] / 255.0f; } vfr = (float)(max[0] - min[0]); vfg = (float)(max[1] - min[1]); vfb = (float)(max[2] - min[2]); for (iter = 0; iter < nIterPower; iter++) { float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; vfr = r; vfg = g; vfb = b; } magn = Math.Abs(vfr); if (Math.Abs(vfg) > magn) { magn = Math.Abs(vfg); } if (Math.Abs(vfb) > magn) { magn = Math.Abs(vfb); } if (magn < 4.0f) { // too small, default to luminance v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000. v_g = 587; v_b = 114; } else { magn = 512.0 / magn; v_r = (int)(vfr * magn); v_g = (int)(vfg * magn); v_b = (int)(vfb * magn); } // Pick colors at extreme points for (i = 0; i < 16; i++) { int dot = block[i].R * v_r + block[i].G * v_g + block[i].B * v_b; if (dot < mind) { mind = dot; minp = block[i]; } if (dot > maxd) { maxd = dot; maxp = block[i]; } } *pmax16 = stb__As16Bit(maxp.R, maxp.G, maxp.B); *pmin16 = stb__As16Bit(minp.R, minp.G, minp.B); }
/// <summary> /// The color matching function /// </summary> /// <param name="block"></param> /// <param name="color"></param> /// <param name="dither"></param> /// <returns></returns> static uint stb__MatchColorsBlock(ARGB_Rev *block, ARGB_Rev *color, bool dither) { uint mask = 0; int dirr = color[0].R - color[1].R; int dirg = color[0].G - color[1].G; int dirb = color[0].B - color[1].B; var dots = stackalloc int[16]; var stops = stackalloc int[4]; int i; int c0Point, halfPoint, c3Point; for (i = 0; i < 16; i++) { dots[i] = block[i].R * dirr + block[i].G * dirg + block[i].B * dirb; } for (i = 0; i < 4; i++) { stops[i] = color[i].R * dirr + color[i].G * dirg + color[i].B * dirb; } // think of the colors as arranged on a line; project point onto that line, then choose // next color out of available ones. we compute the crossover points for "best color in top // half"/"best in bottom half" and then the same inside that subinterval. // // relying on this 1d approximation isn't always optimal in terms of euclidean distance, // but it's very close and a lot faster. // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html c0Point = (stops[1] + stops[3]) >> 1; halfPoint = (stops[3] + stops[2]) >> 1; c3Point = (stops[2] + stops[0]) >> 1; if (!dither) { // the version without dithering is straightforward for (i = 15; i >= 0; i--) { int dot = dots[i]; mask <<= 2; if (dot < halfPoint) { mask |= (uint)((dot < c0Point) ? 1 : 3); } else { mask |= (uint)((dot < c3Point) ? 2 : 0); } } } else { // with floyd-steinberg dithering var err = stackalloc int[8]; int *ep1 = err; int *ep2 = err + 4; int *dp = dots; int y; c0Point <<= 4; halfPoint <<= 4; c3Point <<= 4; for (i = 0; i < 8; i++) { err[i] = 0; } for (y = 0; y < 4; y++) { int dot, lmask, step; dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]); if (dot < halfPoint) { step = (dot < c0Point) ? 1 : 3; } else { step = (dot < c3Point) ? 2 : 0; } ep1[0] = dp[0] - stops[step]; lmask = step; dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]); if (dot < halfPoint) { step = (dot < c0Point) ? 1 : 3; } else { step = (dot < c3Point) ? 2 : 0; } ep1[1] = dp[1] - stops[step]; lmask |= step << 2; dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]); if (dot < halfPoint) { step = (dot < c0Point) ? 1 : 3; } else { step = (dot < c3Point) ? 2 : 0; } ep1[2] = dp[2] - stops[step]; lmask |= step << 4; dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]); if (dot < halfPoint) { step = (dot < c0Point) ? 1 : 3; } else { step = (dot < c3Point) ? 2 : 0; } ep1[3] = dp[3] - stops[step]; lmask |= step << 6; dp += 4; mask |= (uint)(lmask << (y * 8)); { int *et = ep1; ep1 = ep2; ep2 = et; } // swap } } return(mask); }
/// <summary> /// /// </summary> /// <param name="Bitmap"></param> /// <param name="Stream"></param> /// <param name="mode"></param> public void SaveSwizzled3D(BitmapList BitmapList, Stream Stream, CompressDXT.CompressionMode mode = CompressDXT.CompressionMode.Normal, bool ShowWarnings = false) { int Width = BitmapList.Bitmaps[0].Width, Height = BitmapList.Bitmaps[0].Height, Depth = BitmapList.Bitmaps.Length; if ((Width % 4) != 0 || (Height % 4) != 0) { throw (new InvalidDataException()); } BitmapList.LockUnlockBits(PixelFormat.Format32bppArgb, (BitmapDatas) => { var Bases = new ARGB_Rev *[Depth]; for (int n = 0; n < Depth; n++) { Bases[n] = (ARGB_Rev *)BitmapDatas[n].Scan0.ToPointer(); } int BlockWidth = Width / 4; int BlockHeight = Height / 4; //var BlockCount = BlockWidth * BlockHeight; var ExpectedBlockCount = BlockWidth * BlockHeight * Depth; int RealUsedBlockCount; //RealUsedBlockCount = Swizzling.XGAddress2DTiledExtent(BlockWidth, BlockHeight, BlockSize); RealUsedBlockCount = Swizzling.XGAddress3DTiledExtent(BlockWidth, BlockHeight, Depth, BlockSize); //Console.WriteLine("{0} - {1}", ExpectedBlockCount, UsedBlockCount); var BlockCount = RealUsedBlockCount; var CurrentDecodedColors = new ARGB_Rev[4 * 4]; var Blocks = new TBlock[(uint)BlockCount]; for (int dxt5_n = 0; dxt5_n < BlockCount; dxt5_n++) { int TileX, TileY, TileZ; Swizzling.XGAddress3DTiledXYZ(dxt5_n, BlockWidth, BlockHeight, BlockSize, out TileX, out TileY, out TileZ); int PositionX = TileX * 4; int PositionY = TileY * 4; int PositionZ = TileZ; int n = 0; if ((PositionX + 3 >= Width) || (PositionY + 3 >= Height) || (PositionZ >= Depth)) { if (ShowWarnings) { Console.Error.WriteLine("(Warning! [Write] Position outside ({0}, {1}, {2}) - ({3}x{4}x{5}))", PositionX, PositionY, PositionZ, Width, Height, Depth); } continue; } for (int y = 0; y < 4; y++) { for (int x = 0; x < 4; x++) { CurrentDecodedColors[n] = Bases[TileZ][(PositionY + y) * Width + (PositionX + x)]; n++; } } //for (n = 0; n < 16; n++) CurrentDecodedColors[n] = new ARGB_Rev(0xFF, 0xFF, 0, (byte)(n * 16)); EncodeBlock(ref Blocks[dxt5_n], ref CurrentDecodedColors, mode); } //File.WriteAllBytes(@"C:\temp\font\test.txv", StructUtils.StructArrayToBytes(Blocks)); //Console.WriteLine(Blocks.Length * Marshal.SizeOf(typeof(TBlock))); Stream.WriteStructVector(Blocks); Stream.Flush(); }); }
/// <summary> /// /// </summary> /// <param name="File"></param> /// <param name="Width"></param> /// <param name="Height"></param> /// <param name="_Depth"></param> /// <param name="Swizzled"></param> /// <returns></returns> private BitmapList _LoadSwizzled(Stream File, int Width, int Height, int?_Depth, bool Swizzled = true) { if ((Width % 4) != 0 || (Height % 4) != 0) { throw (new InvalidDataException(String.Format("Invalid size {0}x{1} must be multiple of 4", Width, Height))); } int Depth = _Depth ?? 1; bool Is3D = _Depth.HasValue; var BitmapList = new BitmapList(Depth); var BitmapListData = new BitmapData[Depth]; var BitmapListPointers = new ARGB_Rev *[Depth]; for (int n = 0; n < Depth; n++) { BitmapList.Bitmaps[n] = new Bitmap(Width, Height); } for (int n = 0; n < Depth; n++) { var Bitmap = BitmapList.Bitmaps[n]; BitmapListData[n] = Bitmap.LockBits(Bitmap.GetFullRectangle(), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb); BitmapListPointers[n] = (ARGB_Rev *)BitmapListData[n].Scan0.ToPointer(); } int BlockWidth = Width / 4; int BlockHeight = Height / 4; var CurrentDecodedColors = new ARGB_Rev[4 * 4]; var ExpectedBlockCount = BlockWidth * BlockHeight * Depth; int RealUsedBlockCount; if (Is3D) { RealUsedBlockCount = Swizzling.XGAddress3DTiledExtent(Width / 4, Height / 4, Depth, BlockSize); } else { RealUsedBlockCount = Swizzling.XGAddress2DTiledExtent(Width / 4, Height / 4, BlockSize); } //Console.WriteLine("{0} - {1}", ExpectedBlockCount, UsedBlockCount); var BlockCount = RealUsedBlockCount; //var BlockCount = ExpectedBlockCount; if (BlockCount * Marshal.SizeOf(typeof(TBlock)) > File.Length) { Console.Error.WriteLine("File too small"); //throw(new Exception("File too small")); return(new BitmapList(0)); } var Blocks = File.ReadStructVector <TBlock>((uint)BlockCount, -1); //Console.WriteLine(Blocks.Length); for (int BlockN = 0; BlockN < BlockCount; BlockN++) { int TileX, TileY, TileZ; if (Swizzled) { if (Is3D) { Swizzling.XGAddress3DTiledXYZ(BlockN, BlockWidth, BlockHeight, BlockSize, out TileX, out TileY, out TileZ); } else { Swizzling.XGAddress2DTiledXY(BlockN, BlockWidth, BlockSize, out TileX, out TileY); TileZ = 0; } } else { TileX = BlockN % BlockWidth; TileY = BlockN / BlockWidth; TileZ = 0; Console.Error.Write("(Not implemented!)"); } // Skip blocks. if (TileX >= BlockWidth || TileY >= BlockHeight) { continue; } DecodeBlock(ref Blocks[BlockN], ref CurrentDecodedColors); //Console.WriteLine("{0}", CurrentDecodedColors[0]); int PositionX = TileX * 4; int PositionY = TileY * 4; var BlockBitmap = BitmapList.Bitmaps[TileZ]; if ((PositionX + 3 >= BlockBitmap.Width) || (PositionY + 3 >= BlockBitmap.Height)) { Console.Error.WriteLine( "(Warning! [Read] Position outside ({0}, {1}) - ({2}x{3}) ;; ({4}, {5})) - ({6}x{7}) ;; {8}", PositionX, PositionY, Width, Height, TileX, TileY, BlockWidth, BlockHeight, BlockN ); continue; } int n = 0; var BitmapPointer = BitmapListPointers[TileZ]; for (int y = 0; y < 4; y++) { int BaseOffset = (PositionY + y) * Width + (PositionX); for (int x = 0; x < 4; x++) { BitmapPointer[BaseOffset + x] = CurrentDecodedColors[n]; n++; } } } for (int n = 0; n < Depth; n++) { BitmapList.Bitmaps[n].UnlockBits(BitmapListData[n]); } return(BitmapList); }