private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan <uint> tile, int subsetCount, int partition, int w, int h, int maxError) { byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; Span <RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; Span <RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); Span <uint> endPoints0 = stackalloc uint[subsetCount]; Span <uint> endPoints1 = stackalloc uint[subsetCount]; SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); Span <RgbaColor32> palette = stackalloc RgbaColor32[8]; int errorSum = 0; for (int subset = 0; subset < subsetCount; subset++) { RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; if (sum != 0) { blockDir = (blockDir << 6) / new RgbaColor32(sum); } uint c0 = endPoints0[subset]; uint c1 = endPoints1[subset]; int pBit0 = GetPBit(c0, 6, 0); int pBit1 = GetPBit(c1, 6, 0); c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); if (Sse41.IsSupported) { Vector128 <byte> c0Rep = Vector128.Create(c0).AsByte(); Vector128 <byte> c1Rep = Vector128.Create(c1).AsByte(); Vector128 <byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); Vector128 <byte> rWeights; Vector128 <byte> lWeights; fixed(byte *pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong *)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong *)pInvWeights).AsByte(); } Vector128 <byte> iWeights = Sse2.UnpackLow(rWeights, lWeights); Vector128 <byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); Vector128 <byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
private static Block EncodeFast(ReadOnlySpan <uint> tile, int w, int h) { (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); int selectedMode; int indexMode = 0; if (alphaNotOne) { bool constantAlpha = minColor.A == maxColor.A; if (constantAlpha) { selectedMode = variance > MinColorVarianceForModeChange ? 7 : 6; } else { if (variance > MinColorVarianceForModeChange) { Span <uint> uniqueRGB = stackalloc uint[16]; Span <uint> uniqueAlpha = stackalloc uint[16]; int uniqueRGBCount = 0; int uniqueAlphaCount = 0; uint rgbMask = new RgbaColor8(255, 255, 255, 0).ToUInt32(); uint alphaMask = new RgbaColor8(0, 0, 0, 255).ToUInt32(); for (int i = 0; i < tile.Length; i++) { uint c = tile[i]; if (!uniqueRGB.Slice(0, uniqueRGBCount).Contains(c & rgbMask)) { uniqueRGB[uniqueRGBCount++] = c & rgbMask; } if (!uniqueAlpha.Slice(0, uniqueAlphaCount).Contains(c & alphaMask)) { uniqueAlpha[uniqueAlphaCount++] = c & alphaMask; } } selectedMode = 4; indexMode = uniqueRGBCount > uniqueAlphaCount ? 1 : 0; } else { selectedMode = 5; } } } else { if (variance > MinColorVarianceForModeChange) { selectedMode = 1; } else { selectedMode = 6; } } int selectedPartition = 0; if (selectedMode == 1 || selectedMode == 7) { int partitionSelectionLowestError = int.MaxValue; for (int i = 0; i < _mostFrequentPartitions.Length; i++) { int p = _mostFrequentPartitions[i]; int error = GetEndPointSelectionErrorFast(tile, 2, p, w, h, partitionSelectionLowestError); if (error < partitionSelectionLowestError) { partitionSelectionLowestError = error; selectedPartition = p; } } } return(Encode(selectedMode, selectedPartition, 0, indexMode, fastMode: true, tile, w, h, out _)); }
private static Block Encode( int mode, int partition, int rotation, int indexMode, bool fastMode, ReadOnlySpan <uint> tile, int w, int h, out int errorSum) { BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; int subsetCount = modeInfo.SubsetCount; int partitionBitCount = modeInfo.PartitionBitCount; int rotationBitCount = modeInfo.RotationBitCount; int indexModeBitCount = modeInfo.IndexModeBitCount; int colorDepth = modeInfo.ColorDepth; int alphaDepth = modeInfo.AlphaDepth; int pBits = modeInfo.PBits; int colorIndexBitCount = modeInfo.ColorIndexBitCount; int alphaIndexBitCount = modeInfo.AlphaIndexBitCount; bool separateAlphaIndices = alphaIndexBitCount != 0; uint alphaMask; if (separateAlphaIndices) { alphaMask = rotation switch { 1 => new RgbaColor8(255, 0, 0, 0).ToUInt32(), 2 => new RgbaColor8(0, 255, 0, 0).ToUInt32(), 3 => new RgbaColor8(0, 0, 255, 0).ToUInt32(), _ => new RgbaColor8(0, 0, 0, 255).ToUInt32() }; } else { alphaMask = new RgbaColor8(0, 0, 0, 0).ToUInt32(); } if (indexMode != 0) { alphaMask = ~alphaMask; } // // Select color palette. // Span <uint> endPoints0 = stackalloc uint[subsetCount]; Span <uint> endPoints1 = stackalloc uint[subsetCount]; SelectEndPoints( tile, w, h, endPoints0, endPoints1, subsetCount, partition, colorIndexBitCount, colorDepth, alphaDepth, ~alphaMask, fastMode); if (separateAlphaIndices) { SelectEndPoints( tile, w, h, endPoints0, endPoints1, subsetCount, partition, alphaIndexBitCount, colorDepth, alphaDepth, alphaMask, fastMode); } Span <int> pBitValues = stackalloc int[pBits]; for (int i = 0; i < pBits; i++) { int pBit; if (pBits == subsetCount) { pBit = GetPBit(endPoints0[i], endPoints1[i], colorDepth, alphaDepth); } else { int subset = i >> 1; uint color = (i & 1) == 0 ? endPoints0[subset] : endPoints1[subset]; pBit = GetPBit(color, colorDepth, alphaDepth); } pBitValues[i] = pBit; } int colorIndexCount = 1 << colorIndexBitCount; int alphaIndexCount = 1 << alphaIndexBitCount; Span <byte> colorIndices = stackalloc byte[16]; Span <byte> alphaIndices = stackalloc byte[16]; errorSum = BC67Utils.SelectIndices( tile, w, h, endPoints0, endPoints1, pBitValues, colorIndices, subsetCount, partition, colorIndexBitCount, colorIndexCount, colorDepth, alphaDepth, pBits, alphaMask); if (separateAlphaIndices) { errorSum += BC67Utils.SelectIndices( tile, w, h, endPoints0, endPoints1, pBitValues, alphaIndices, subsetCount, partition, alphaIndexBitCount, alphaIndexCount, colorDepth, alphaDepth, pBits, ~alphaMask); } Span <bool> colorSwapSubset = stackalloc bool[3]; for (int i = 0; i < 3; i++) { colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); } bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); Block block = new Block(); int offset = 0; block.Encode(1UL << mode, ref offset, mode + 1); block.Encode((ulong)partition, ref offset, partitionBitCount); block.Encode((ulong)rotation, ref offset, rotationBitCount); block.Encode((ulong)indexMode, ref offset, indexModeBitCount); for (int comp = 0; comp < 3; comp++) { int rotatedComp = comp; if (((comp + 1) & 3) == rotation) { rotatedComp = 3; } for (int subset = 0; subset < subsetCount; subset++) { RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); int pBit0 = -1, pBit1 = -1; if (pBits == subsetCount) { pBit0 = pBit1 = pBitValues[subset]; } else if (pBits != 0) { pBit0 = pBitValues[subset * 2]; pBit1 = pBitValues[subset * 2 + 1]; } if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) { block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); } else { block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); } } } if (alphaDepth != 0) { int rotatedComp = (rotation - 1) & 3; for (int subset = 0; subset < subsetCount; subset++) { RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); int pBit0 = -1, pBit1 = -1; if (pBits == subsetCount) { pBit0 = pBit1 = pBitValues[subset]; } else if (pBits != 0) { pBit0 = pBitValues[subset * 2]; pBit1 = pBitValues[subset * 2 + 1]; } if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) { block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); } else { block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); } } } for (int i = 0; i < pBits; i++) { block.Encode((ulong)pBitValues[i], ref offset, 1); } byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; for (int i = 0; i < 16; i++) { int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; byte index = colorIndices[i]; if (colorSwapSubset[subset]) { index = (byte)(index ^ (colorIndexCount - 1)); } int finalIndexBitCount = i == fixUpTable[subset] ? colorIndexBitCount - 1 : colorIndexBitCount; Debug.Assert(index < (1 << finalIndexBitCount)); block.Encode(index, ref offset, finalIndexBitCount); } if (separateAlphaIndices) { for (int i = 0; i < 16; i++) { byte index = alphaIndices[i]; if (alphaSwapSubset) { index = (byte)(index ^ (alphaIndexCount - 1)); } int finalIndexBitCount = i == 0 ? alphaIndexBitCount - 1 : alphaIndexBitCount; Debug.Assert(index < (1 << finalIndexBitCount)); block.Encode(index, ref offset, finalIndexBitCount); } } return(block); }
private static void DecodeBlock(Block block, Span <uint> output, int w, int h, int width) { int mode = BitOperations.TrailingZeroCount((byte)block.Low | 0x100); if (mode == 8) { // Mode is invalid, the spec mandates that hardware fills the block with // a transparent black color. for (int ty = 0; ty < h; ty++) { int baseOffs = ty * width; for (int tx = 0; tx < w; tx++) { int offs = baseOffs + tx; output[offs] = 0; } } return; } BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; int offset = mode + 1; int partition = (int)block.Decode(ref offset, modeInfo.PartitionBitCount); int rotation = (int)block.Decode(ref offset, modeInfo.RotationBitCount); int indexMode = (int)block.Decode(ref offset, modeInfo.IndexModeBitCount); Debug.Assert(partition < 64); Debug.Assert(rotation < 4); Debug.Assert(indexMode < 2); int endPointCount = modeInfo.SubsetCount * 2; Span <RgbaColor32> endPoints = stackalloc RgbaColor32[endPointCount]; Span <byte> pValues = stackalloc byte[modeInfo.PBits]; endPoints.Fill(new RgbaColor32(0, 0, 0, 255)); for (int i = 0; i < endPointCount; i++) { endPoints[i].R = (int)block.Decode(ref offset, modeInfo.ColorDepth); } for (int i = 0; i < endPointCount; i++) { endPoints[i].G = (int)block.Decode(ref offset, modeInfo.ColorDepth); } for (int i = 0; i < endPointCount; i++) { endPoints[i].B = (int)block.Decode(ref offset, modeInfo.ColorDepth); } if (modeInfo.AlphaDepth != 0) { for (int i = 0; i < endPointCount; i++) { endPoints[i].A = (int)block.Decode(ref offset, modeInfo.AlphaDepth); } } for (int i = 0; i < modeInfo.PBits; i++) { pValues[i] = (byte)block.Decode(ref offset, 1); } for (int i = 0; i < endPointCount; i++) { int pBit = -1; if (modeInfo.PBits != 0) { int pIndex = (i * modeInfo.PBits) / endPointCount; pBit = pValues[pIndex]; } Unquantize(ref endPoints[i], modeInfo.ColorDepth, modeInfo.AlphaDepth, pBit); } byte[] partitionTable = BC67Tables.PartitionTable[modeInfo.SubsetCount - 1][partition]; byte[] fixUpTable = BC67Tables.FixUpIndices[modeInfo.SubsetCount - 1][partition]; Span <byte> colorIndices = stackalloc byte[16]; for (int i = 0; i < 16; i++) { byte subset = partitionTable[i]; int bitCount = i == fixUpTable[subset] ? modeInfo.ColorIndexBitCount - 1 : modeInfo.ColorIndexBitCount; colorIndices[i] = (byte)block.Decode(ref offset, bitCount); Debug.Assert(colorIndices[i] < 16); } Span <byte> alphaIndices = stackalloc byte[16]; if (modeInfo.AlphaIndexBitCount != 0) { for (int i = 0; i < 16; i++) { int bitCount = i != 0 ? modeInfo.AlphaIndexBitCount : modeInfo.AlphaIndexBitCount - 1; alphaIndices[i] = (byte)block.Decode(ref offset, bitCount); Debug.Assert(alphaIndices[i] < 16); } } for (int ty = 0; ty < h; ty++) { int baseOffs = ty * width; for (int tx = 0; tx < w; tx++) { int i = ty * 4 + tx; RgbaColor32 color; byte subset = partitionTable[i]; RgbaColor32 color1 = endPoints[subset * 2]; RgbaColor32 color2 = endPoints[subset * 2 + 1]; if (modeInfo.AlphaIndexBitCount != 0) { if (indexMode == 0) { color = BC67Utils.Interpolate(color1, color2, colorIndices[i], alphaIndices[i], modeInfo.ColorIndexBitCount, modeInfo.AlphaIndexBitCount); } else { color = BC67Utils.Interpolate(color1, color2, alphaIndices[i], colorIndices[i], modeInfo.AlphaIndexBitCount, modeInfo.ColorIndexBitCount); } } else { color = BC67Utils.Interpolate(color1, color2, colorIndices[i], colorIndices[i], modeInfo.ColorIndexBitCount, modeInfo.ColorIndexBitCount); } if (rotation != 0) { int a = color.A; switch (rotation) { case 1: color.A = color.R; color.R = a; break; case 2: color.A = color.G; color.G = a; break; case 3: color.A = color.B; color.B = a; break; } } RgbaColor8 color8 = color.GetColor8(); output[baseOffs + tx] = color8.ToUInt32(); } } }