private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan <uint> tile, int subsetCount, int partition, int w, int h, int maxError) { byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; Span <RgbaColor8> minColors = stackalloc RgbaColor8[subsetCount]; Span <RgbaColor8> maxColors = stackalloc RgbaColor8[subsetCount]; BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); Span <uint> endPoints0 = stackalloc uint[subsetCount]; Span <uint> endPoints1 = stackalloc uint[subsetCount]; SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); Span <RgbaColor32> palette = stackalloc RgbaColor32[8]; int errorSum = 0; for (int subset = 0; subset < subsetCount; subset++) { RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; if (sum != 0) { blockDir = (blockDir << 6) / new RgbaColor32(sum); } uint c0 = endPoints0[subset]; uint c1 = endPoints1[subset]; int pBit0 = GetPBit(c0, 6, 0); int pBit1 = GetPBit(c1, 6, 0); c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); if (Sse41.IsSupported) { Vector128 <byte> c0Rep = Vector128.Create(c0).AsByte(); Vector128 <byte> c1Rep = Vector128.Create(c1).AsByte(); Vector128 <byte> c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); Vector128 <byte> rWeights; Vector128 <byte> lWeights; fixed(byte *pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong *)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong *)pInvWeights).AsByte(); } Vector128 <byte> iWeights = Sse2.UnpackLow(rWeights, lWeights); Vector128 <byte> iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); Vector128 <byte> iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); Vector128 <byte> iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); Vector128 <byte> iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte();
private static Block EncodeFast(ReadOnlySpan <uint> tile, int w, int h) { (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); int selectedMode; int indexMode = 0; if (alphaNotOne) { bool constantAlpha = minColor.A == maxColor.A; if (constantAlpha) { selectedMode = variance > MinColorVarianceForModeChange ? 7 : 6; } else { if (variance > MinColorVarianceForModeChange) { Span <uint> uniqueRGB = stackalloc uint[16]; Span <uint> uniqueAlpha = stackalloc uint[16]; int uniqueRGBCount = 0; int uniqueAlphaCount = 0; uint rgbMask = new RgbaColor8(255, 255, 255, 0).ToUInt32(); uint alphaMask = new RgbaColor8(0, 0, 0, 255).ToUInt32(); for (int i = 0; i < tile.Length; i++) { uint c = tile[i]; if (!uniqueRGB.Slice(0, uniqueRGBCount).Contains(c & rgbMask)) { uniqueRGB[uniqueRGBCount++] = c & rgbMask; } if (!uniqueAlpha.Slice(0, uniqueAlphaCount).Contains(c & alphaMask)) { uniqueAlpha[uniqueAlphaCount++] = c & alphaMask; } } selectedMode = 4; indexMode = uniqueRGBCount > uniqueAlphaCount ? 1 : 0; } else { selectedMode = 5; } } } else { if (variance > MinColorVarianceForModeChange) { selectedMode = 1; } else { selectedMode = 6; } } int selectedPartition = 0; if (selectedMode == 1 || selectedMode == 7) { int partitionSelectionLowestError = int.MaxValue; for (int i = 0; i < _mostFrequentPartitions.Length; i++) { int p = _mostFrequentPartitions[i]; int error = GetEndPointSelectionErrorFast(tile, 2, p, w, h, partitionSelectionLowestError); if (error < partitionSelectionLowestError) { partitionSelectionLowestError = error; selectedPartition = p; } } } return(Encode(selectedMode, selectedPartition, 0, indexMode, fastMode: true, tile, w, h, out _)); }