private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan <uint> tile, int subsetCount, int partition, int w, int h, int maxError) { byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; Span <RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; Span <RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); Span <uint> endPoints0 = stackalloc uint[subsetCount]; Span <uint> endPoints1 = stackalloc uint[subsetCount]; SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); Span <RgbaColor32> palette = stackalloc RgbaColor32[8]; int errorSum = 0; for (int subset = 0; subset < subsetCount; subset++) { RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; if (sum != 0) { blockDir = (blockDir << 6) / new RgbaColor32(sum); } uint c0 = endPoints0[subset]; uint c1 = endPoints1[subset]; int pBit0 = GetPBit(c0, 6, 0); int pBit1 = GetPBit(c1, 6, 0); c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); if (Sse41.IsSupported) { Vector128 <byte> c0Rep = Vector128.Create(c0).AsByte(); Vector128 <byte> c1Rep = Vector128.Create(c1).AsByte(); Vector128 <byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); Vector128 <byte> rWeights; Vector128 <byte> lWeights; fixed(byte *pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong *)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong *)pInvWeights).AsByte(); } Vector128 <byte> iWeights = Sse2.UnpackLow(rWeights, lWeights); Vector128 <byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); Vector128 <byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
private static Block Encode( int mode, int partition, int rotation, int indexMode, bool fastMode, ReadOnlySpan <uint> tile, int w, int h, out int errorSum) { BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; int subsetCount = modeInfo.SubsetCount; int partitionBitCount = modeInfo.PartitionBitCount; int rotationBitCount = modeInfo.RotationBitCount; int indexModeBitCount = modeInfo.IndexModeBitCount; int colorDepth = modeInfo.ColorDepth; int alphaDepth = modeInfo.AlphaDepth; int pBits = modeInfo.PBits; int colorIndexBitCount = modeInfo.ColorIndexBitCount; int alphaIndexBitCount = modeInfo.AlphaIndexBitCount; bool separateAlphaIndices = alphaIndexBitCount != 0; uint alphaMask; if (separateAlphaIndices) { alphaMask = rotation switch { 1 => new RgbaColor8(255, 0, 0, 0).ToUInt32(), 2 => new RgbaColor8(0, 255, 0, 0).ToUInt32(), 3 => new RgbaColor8(0, 0, 255, 0).ToUInt32(), _ => new RgbaColor8(0, 0, 0, 255).ToUInt32() }; } else { alphaMask = new RgbaColor8(0, 0, 0, 0).ToUInt32(); } if (indexMode != 0) { alphaMask = ~alphaMask; } // // Select color palette. // Span <uint> endPoints0 = stackalloc uint[subsetCount]; Span <uint> endPoints1 = stackalloc uint[subsetCount]; SelectEndPoints( tile, w, h, endPoints0, endPoints1, subsetCount, partition, colorIndexBitCount, colorDepth, alphaDepth, ~alphaMask, fastMode); if (separateAlphaIndices) { SelectEndPoints( tile, w, h, endPoints0, endPoints1, subsetCount, partition, alphaIndexBitCount, colorDepth, alphaDepth, alphaMask, fastMode); } Span <int> pBitValues = stackalloc int[pBits]; for (int i = 0; i < pBits; i++) { int pBit; if (pBits == subsetCount) { pBit = GetPBit(endPoints0[i], endPoints1[i], colorDepth, alphaDepth); } else { int subset = i >> 1; uint color = (i & 1) == 0 ? endPoints0[subset] : endPoints1[subset]; pBit = GetPBit(color, colorDepth, alphaDepth); } pBitValues[i] = pBit; } int colorIndexCount = 1 << colorIndexBitCount; int alphaIndexCount = 1 << alphaIndexBitCount; Span <byte> colorIndices = stackalloc byte[16]; Span <byte> alphaIndices = stackalloc byte[16]; errorSum = BC67Utils.SelectIndices( tile, w, h, endPoints0, endPoints1, pBitValues, colorIndices, subsetCount, partition, colorIndexBitCount, colorIndexCount, colorDepth, alphaDepth, pBits, alphaMask); if (separateAlphaIndices) { errorSum += BC67Utils.SelectIndices( tile, w, h, endPoints0, endPoints1, pBitValues, alphaIndices, subsetCount, partition, alphaIndexBitCount, alphaIndexCount, colorDepth, alphaDepth, pBits, ~alphaMask); } Span <bool> colorSwapSubset = stackalloc bool[3]; for (int i = 0; i < 3; i++) { colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); } bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); Block block = new Block(); int offset = 0; block.Encode(1UL << mode, ref offset, mode + 1); block.Encode((ulong)partition, ref offset, partitionBitCount); block.Encode((ulong)rotation, ref offset, rotationBitCount); block.Encode((ulong)indexMode, ref offset, indexModeBitCount); for (int comp = 0; comp < 3; comp++) { int rotatedComp = comp; if (((comp + 1) & 3) == rotation) { rotatedComp = 3; } for (int subset = 0; subset < subsetCount; subset++) { RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); int pBit0 = -1, pBit1 = -1; if (pBits == subsetCount) { pBit0 = pBit1 = pBitValues[subset]; } else if (pBits != 0) { pBit0 = pBitValues[subset * 2]; pBit1 = pBitValues[subset * 2 + 1]; } if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) { block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); } else { block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); } } } if (alphaDepth != 0) { int rotatedComp = (rotation - 1) & 3; for (int subset = 0; subset < subsetCount; subset++) { RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); int pBit0 = -1, pBit1 = -1; if (pBits == subsetCount) { pBit0 = pBit1 = pBitValues[subset]; } else if (pBits != 0) { pBit0 = pBitValues[subset * 2]; pBit1 = pBitValues[subset * 2 + 1]; } if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) { block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); } else { block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); } } } for (int i = 0; i < pBits; i++) { block.Encode((ulong)pBitValues[i], ref offset, 1); } byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; for (int i = 0; i < 16; i++) { int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; byte index = colorIndices[i]; if (colorSwapSubset[subset]) { index = (byte)(index ^ (colorIndexCount - 1)); } int finalIndexBitCount = i == fixUpTable[subset] ? colorIndexBitCount - 1 : colorIndexBitCount; Debug.Assert(index < (1 << finalIndexBitCount)); block.Encode(index, ref offset, finalIndexBitCount); } if (separateAlphaIndices) { for (int i = 0; i < 16; i++) { byte index = alphaIndices[i]; if (alphaSwapSubset) { index = (byte)(index ^ (alphaIndexCount - 1)); } int finalIndexBitCount = i == 0 ? alphaIndexBitCount - 1 : alphaIndexBitCount; Debug.Assert(index < (1 << finalIndexBitCount)); block.Encode(index, ref offset, finalIndexBitCount); } } return(block); }