internal static void fDCT2D_llm( MutableSpan <float> s, MutableSpan <float> d, MutableSpan <float> temp, bool downscaleBy8 = false, bool offsetSourceByNeg128 = false) { MutableSpan <float> sWorker = offsetSourceByNeg128 ? s.AddScalarToAllValues(-128f) : s; for (int j = 0; j < 8; j++) { fDCT1Dllm_32f(sWorker.Slice(j * 8), temp.Slice(j * 8)); } Transpose8x8(temp, d); for (int j = 0; j < 8; j++) { fDCT1Dllm_32f(d.Slice(j * 8), temp.Slice(j * 8)); } Transpose8x8(temp, d); if (downscaleBy8) { for (int j = 0; j < 64; j++) { d[j] *= 0.125f; } } }
public static void fDCT8x8_llm_sse(MutableSpan <float> s, MutableSpan <float> d, MutableSpan <float> temp) { Transpose8x8(s, temp); fDCT2D8x4_32f(temp, d); fDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); Transpose8x8(d, temp); fDCT2D8x4_32f(temp, d); fDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); Vector4 c = new Vector4(0.1250f); _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //0 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //1 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //2 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //3 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //4 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //5 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //6 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //7 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //8 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //9 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //10 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //11 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //12 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //13 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //14 _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4); //15 }
private static void _mm_store_ps(MutableSpan <float> dest, int offset, Vector4 src) { dest = dest.Slice(offset); dest[0] = src.X; dest[1] = src.Y; dest[2] = src.Z; dest[3] = src.W; }
/// <summary> /// Original: https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239 /// Applyies IDCT transformation on "s" copying transformed values to "d", using temporal block "temp" /// </summary> /// <param name="s"></param> /// <param name="d"></param> /// <param name="temp"></param> internal static void iDCT2D_llm(MutableSpan <float> s, MutableSpan <float> d, MutableSpan <float> temp) { int j; for (j = 0; j < 8; j++) { iDCT1Dllm_32f(s.Slice(j * 8), temp.Slice(j * 8)); } Transpose8x8(temp, d); for (j = 0; j < 8; j++) { iDCT1Dllm_32f(d.Slice(j * 8), temp.Slice(j * 8)); } Transpose8x8(temp, d); for (j = 0; j < 64; j++) { d[j] *= 0.125f; } }
public void FDCT8x4_RightPart(int seed) { var src = Create8x8RandomFloatData(-200, 200, seed); var srcBlock = new Block8x8F(); srcBlock.LoadFrom(src); var destBlock = new Block8x8F(); var expectedDest = new MutableSpan <float>(64); ReferenceImplementations.fDCT2D8x4_32f(src.Slice(4), expectedDest.Slice(4)); DCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock); var actualDest = new MutableSpan <float>(64); destBlock.CopyTo(actualDest); Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f)); }
public void iDCT2D8x4_RightPart() { MutableSpan <float> sourceArray = Create8x8FloatData(); MutableSpan <float> expectedDestArray = new float[64]; ReferenceImplementations.iDCT2D8x4_32f(sourceArray.Slice(4), expectedDestArray.Slice(4)); Block8x8F source = new Block8x8F(); source.LoadFrom(sourceArray); Block8x8F dest = new Block8x8F(); DCT.IDCT8x4_RightPart(ref source, ref dest); float[] actualDestArray = new float[64]; dest.CopyTo(actualDestArray); this.Print8x8Data(expectedDestArray); this.Output.WriteLine("**************"); this.Print8x8Data(actualDestArray); Assert.Equal(expectedDestArray.Data, actualDestArray); }
private static Vector4 _mm_load_ps(MutableSpan <float> src, int offset) { src = src.Slice(offset); return(new Vector4(src[0], src[1], src[2], src[3])); }