static unsafe int Test(ref S s, Vector128 <float> v, int offset) { int returnVal = 100; if (Sse2.IsSupported) { fixed(float *p = &s.f0) { // We need an address aligned on 16 bytes, so we need to add a *float* offset to get there. int alignmentOffset = (0x10 - ((int)p & 0xc)) >> 2; try { // This is the aligned case. // We're going to store a scalar at an offset of 2 from the aligned location. // As it happens, we know that the struct has been initialized to all zeros, // and the vector passed in was all ones, so now we have a one at offset 2. Sse2.StoreScalar(p + alignmentOffset + 2, Sse2.Subtract(v, Sse2.LoadAlignedVector128(p + offset + alignmentOffset + 4))); // Now do a load from the aligned location. // That should give us {0, 0, 1, 0}. Vector128 <float> v2; if (Sse41.IsSupported) { v2 = Sse41.LoadAlignedVector128NonTemporal((byte *)(p + alignmentOffset)).AsSingle(); } else { v2 = Sse2.LoadVector128((byte *)(p + alignmentOffset)).AsSingle(); } if (!v2.Equals(Vector128.Create(0.0F, 0.0F, 1.0F, 0.0F))) { Console.WriteLine("Aligned case FAILED: v2 = " + v2); returnVal = -1; } // This is the unaligned case. The value we're loading to subtract is one element earlier than what we just stored. // So we're doing { 1, 1, 1, 1 } - { 0, 1, 0, 0 } = { 1, 0, 1, 1 } Sse2.Store(p + alignmentOffset + 1, Sse2.Subtract(v, Sse2.LoadVector128(p + offset + alignmentOffset + 1))); // Now do an unaligned load from that location. v2 = Sse2.LoadVector128(p + alignmentOffset + 1); if (!v2.Equals(Vector128.Create(1.0F, 0.0F, 1.0F, 1.0F))) { Console.WriteLine("Unaligned case FAILED: v2 = " + v2); returnVal = -1; } } catch (Exception e) { Console.WriteLine("Unexpected exception: " + e.Message); returnVal = -1; } } } return(returnVal); }
public static unsafe Vector128 <ulong> _mm_stream_load_si128(ulong *address) { return(Sse41.LoadAlignedVector128NonTemporal(address)); }
static unsafe int Main(string[] args) { int testResult = Pass; if (Sse41.IsSupported) { { byte *inBuffer = stackalloc byte[32]; int * inArray = (int *)Align(inBuffer, 16); int * outArray = stackalloc int[4]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 4; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on int:"); for (var n = 0; n < 4; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte *inBuffer = stackalloc byte[32]; long *inArray = (long *)Align(inBuffer, 16); long *outArray = stackalloc long[2]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 2; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on long:"); for (var n = 0; n < 2; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte *inBuffer = stackalloc byte[32]; uint *inArray = (uint *)Align(inBuffer, 16); uint *outArray = stackalloc uint[4]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 4; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on uint:"); for (var n = 0; n < 4; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte * inBuffer = stackalloc byte[32]; ulong *inArray = (ulong *)Align(inBuffer, 16); ulong *outArray = stackalloc ulong[2]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 2; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ulong:"); for (var n = 0; n < 2; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte * inBuffer = stackalloc byte[32]; short *inArray = (short *)Align(inBuffer, 16); short *outArray = stackalloc short[8]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 8; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on short:"); for (var n = 0; n < 8; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte * inBuffer = stackalloc byte[32]; ushort *inArray = (ushort *)Align(inBuffer, 16); ushort *outArray = stackalloc ushort[8]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 8; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ushort:"); for (var n = 0; n < 8; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte * inBuffer = stackalloc byte[32]; sbyte *inArray = (sbyte *)Align(inBuffer, 16); sbyte *outArray = stackalloc sbyte[16]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 16; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on sbyte:"); for (var n = 0; n < 16; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } { byte *inBuffer = stackalloc byte[32]; byte *inArray = (byte *)Align(inBuffer, 16); byte *outArray = stackalloc byte[16]; var vf = Sse41.LoadAlignedVector128NonTemporal(inArray); Unsafe.Write(outArray, vf); for (var i = 0; i < 16; i++) { if (inArray[i] != outArray[i]) { Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on byte:"); for (var n = 0; n < 16; n++) { Console.Write(outArray[n] + ", "); } Console.WriteLine(); testResult = Fail; break; } } } } return(testResult); }