internal static void FDCT2D_llm(
                Span <float> s,
                Span <float> d,
                Span <float> temp,
                bool downscaleBy8          = false,
                bool subtract128FromSource = false)
            {
                Span <float> sWorker = subtract128FromSource ? s.AddScalarToAllValues(-128f) : s;

                for (int j = 0; j < 8; j++)
                {
                    FDCT1Dllm_32f(sWorker.Slice(j * 8), temp.Slice(j * 8));
                }

                Transpose8x8(temp, d);

                for (int j = 0; j < 8; j++)
                {
                    FDCT1Dllm_32f(d.Slice(j * 8), temp.Slice(j * 8));
                }

                Transpose8x8(temp, d);

                if (downscaleBy8)
                {
                    for (int j = 0; j < 64; j++)
                    {
                        d[j] *= 0.125f;
                    }
                }
            }
            public void ForwardThenInverse(int seed, int startAt)
            {
                Span <int> original = Create8x8RandomIntData(-200, 200, seed);

                Span <int> block = original.AddScalarToAllValues(128);

                ReferenceImplementations.StandardIntegerDCT.Subtract128_TransformFDCT_Upscale8_Inplace(block);

                for (int i = 0; i < 64; i++)
                {
                    block[i] /= 8;
                }

                ReferenceImplementations.StandardIntegerDCT.TransformIDCTInplace(block);

                for (int i = startAt; i < 64; i++)
                {
                    float expected = original[i];
                    float actual   = block[i];

                    Assert.Equal(expected, actual, new ApproximateFloatComparer(3f));
                }
            }