示例#1
0
 /// <summary>
 /// Create native code of cpuid checking function and wrap it into <see cref="NativeMethodHandle{TDelegate}"/>.
 /// </summary>
 /// <returns>cpuid support checking method cache.</returns>
 /// <exception cref="PlatformNotSupportedException">Throw when processor architecture is not x64 nor x86.</exception>
 public static NativeMethodHandle <IsCpuIdSupportedDelegate> CreateIsCpuIdSupportedMethodHandle()
 {
     if (!IsSupportedArchitecture())
     {
         ThrowPlatformNotSupportedException("cpuid support checking function is not supported on this architecture");
     }
     return(NativeMethodHandle.Create <IsCpuIdSupportedDelegate>(Environment.Is64BitProcess
                                                                 // bool __stdcall isCpuIdSupported(void);  // The return value type may be 64bit value such as long long int
         ? new byte[]
     {
         0x9c,                                    // pushfq
         0x9c,                                    // pushfq
         0x58,                                    // pop     rax
         0x48, 0x89, 0xc1,                        // mov     rcx,rax
         0x48, 0x35, 0x00, 0x00, 0x20, 0x00,      // xor     rax,200000h
         0x50,                                    // push    rax
         0x9d,                                    // popfq
         0x9c,                                    // pushfq
         0x58,                                    // pop     rax
         0x48, 0x31, 0xc8,                        // xor     rax,rcx
         0x48, 0xc1, 0xe8, 0x15,                  // shr     rax,21
         0x9d,                                    // popfq
         0xc3                                     // ret
     }
                                                  // bool __stdcall isCpuIdSupported(void);  // The return value type may be 32bit value such as int
         : new byte[]
     {
         0x9c,                              // pushf
         0x9c,                              // pushf
         0x58,                              // pop    eax
         0x89, 0xc1,                        // mov    ecx,eax
         0x35, 0x00, 0x00, 0x20, 0x00,      // xor    eax,200000h
         0x50,                              // push   eax
         0x9d,                              // popf
         0x9c,                              // pushf
         0x58,                              // pop    eax
         0x31, 0xc8,                        // xor    eax,ecx
         0xc1, 0xe8, 0x15,                  // shr    eax,21
         0x9d,                              // popf
         0xc3,                              // ret
     }));
 }
示例#2
0
 /// <summary>
 /// Create native code of cpuid and wrap it into <see cref="NativeMethodHandle{TDelegate}"/>.
 /// </summary>
 /// <typeparam name="TDelegate">Delegate for native method of cpuid.</typeparam>
 /// <returns>Method handle of cpuid.</returns>
 /// <exception cref="PlatformNotSupportedException">Throw when processor architecture is not x64 nor x86.</exception>
 private static NativeMethodHandle <TDelegate> CreateCpuIdMethodHandle <TDelegate>()
     where TDelegate : Delegate
 {
     if (!IsSupportedArchitecture())
     {
         ThrowPlatformNotSupportedException("cpuid is not supported on this architecture");
     }
     return(NativeMethodHandle.Create <TDelegate>(Environment.Is64BitProcess
                                                  // void __stdcall cpuid(int* cpuInfo, int eax, int ecx);
         ? new byte[]
     {
         0x53,                        // push   rbx
         0x49, 0x89, 0xc9,            // mov    r9,rcx
         0x89, 0xd0,                  // mov    eax,edx
         0x44, 0x89, 0xc1,            // mov    ecx,r8d
         0x0f, 0xa2,                  // cpuid
         0x41, 0x89, 0x01,            // mov    dword ptr [r9],eax
         0x41, 0x89, 0x59, 0x04,      // mov    dword ptr [r9 + 04h],ebx
         0x41, 0x89, 0x49, 0x08,      // mov    dword ptr [r9 + 08h],ecx
         0x41, 0x89, 0x51, 0x0c,      // mov    dword ptr [r9 + 0ch],edx
         0x5b,                        // pop    rbx
         0xc3                         // ret
     }
                                      // void __stdcall cpuid(int* cpuInfo, int eax, int ecx);
         : new byte[]
     {
         0x56,                        // push   esi
         0x53,                        // push   ebx
         0x8b, 0x74, 0x24, 0x0c,      // mov    esi,dword ptr [esp + 0Ch]
         0x8b, 0x44, 0x24, 0x10,      // mov    eax,dword ptr [esp + 10h]
         0x8b, 0x4c, 0x24, 0x14,      // mov    ecx,dword ptr [esp + 14h]
         0x0f, 0xa2,                  // cpuid
         0x89, 0x06,                  // mov    dword ptr [esi],eax
         0x89, 0x5e, 0x04,            // mov    dword ptr [esi + 04h],ebx
         0x89, 0x4e, 0x08,            // mov    dword ptr [esi + 08h],ecx
         0x89, 0x56, 0x0c,            // mov    dword ptr [esi + 0Ch],edx
         0x5b,                        // pop    ebx
         0x5e,                        // pop    esi
         0xc2, 0x0c, 0x00             // ret    0Ch
     }));
 }
示例#3
0
 /// <summary>
 /// Create native method handle of  memory compare function using AVX2.
 /// </summary>
 /// <returns>Created native method handle.</returns>
 private static NativeMethodHandle <CompareMemoryDelegate> CreateCompareMemoryAvx2MethodHandle()
 {
     return(NativeMethodHandle.Create <CompareMemoryDelegate>(Environment.Is64BitProcess ? new byte[]
     {
         0x4d, 0x89, 0xc1,                              // mov    r9,r8
         0x49, 0x83, 0xe1, 0xe0,                        // and    r9,0xffffffffffffffe0
         0x74, 0x43,                                    // je     L4
         0x45, 0x31, 0xd2,                              // xor    r10d,r10d
         0x31, 0xc0,                                    // xor    eax,eax
         0xeb, 0x0c,                                    // jmp    L2
         // L1:
         0x41, 0x83, 0xc2, 0x20,                        // add    r10d,0x20
         0x49, 0x63, 0xc2,                              // movsxd rax,r10d
         0x4c, 0x39, 0xc8,                              // cmp    rax,r9
         0x73, 0x2d,                                    // jae    L3
         // L2:
         0xc5, 0xfa, 0x6f, 0x14, 0x02,                  // vmovdqu xmm2,XMMWORD PTR [rdx+rax*1]
         0xc5, 0xfa, 0x6f, 0x1c, 0x01,                  // vmovdqu xmm3,XMMWORD PTR [rcx+rax*1]
         0xc4, 0xe3, 0x6d, 0x38, 0x44, 0x02, 0x10,      // vinserti128 ymm0,ymm2,XMMWORD PTR [rdx+rax*1+0x10],0x1
         0x01,
         0xc4, 0xe3, 0x65, 0x38, 0x4c, 0x01, 0x10,      // vinserti128 ymm1,ymm3,XMMWORD PTR [rcx+rax*1+0x10],0x1
         0x01,
         0xc5, 0xfd, 0x74, 0xc1,                        // vpcmpeqb ymm0,ymm0,ymm1
         0xc5, 0xfd, 0xd7, 0xc0,                        // vpmovmskb eax,ymm0
         0x83, 0xf8, 0xff,                              // cmp    eax,0xffffffff
         0x74, 0xcd,                                    // je     L1
         0x31, 0xc0,                                    // xor    eax,eax
         0xc5, 0xf8, 0x77,                              // vzeroupper
         0xc3,                                          // ret
         // L3:
         0xc5, 0xf8, 0x77,                              // vzeroupper
         // L4:
         0x49, 0x63, 0xc1,                              // movsxd rax,r9d
         0x49, 0x39, 0xc0,                              // cmp    r8,rax
         0x77, 0x0f,                                    // ja     L7
         // L5:
         0xb8, 0x01, 0x00, 0x00, 0x00,                  // mov    eax,0x1
         0xc3,                                          // ret
         // L6:
         0x48, 0x83, 0xc0, 0x01,                        // add    rax,0x1
         0x49, 0x39, 0xc0,                              // cmp    r8,rax
         0x76, 0xf1,                                    // jbe    L5
         // L7:
         0x44, 0x0f, 0xb6, 0x1c, 0x02,                  // movzx  r11d,BYTE PTR [rdx+rax*1]
         0x44, 0x38, 0x1c, 0x01,                        // cmp    BYTE PTR [rcx+rax*1],r11b
         0x74, 0xec,                                    // je     L6
         0x31, 0xc0,                                    // xor    eax,eax
         0xc3                                           // ret
     } : new byte[]
     {
         0x55,                                          // push   ebp
         0x89, 0xe5,                                    // mov    ebp,esp
         0x57,                                          // push   edi
         0x56,                                          // push   esi
         0x8b, 0x75, 0x10,                              // mov    esi,DWORD PTR [ebp+0x10]
         0x53,                                          // push   ebx
         0x8b, 0x4d, 0x08,                              // mov    ecx,DWORD PTR [ebp+0x8]
         0x89, 0xf7,                                    // mov    edi,esi
         0x8b, 0x5d, 0x0c,                              // mov    ebx,DWORD PTR [ebp+0xc]
         0x83, 0xe7, 0xe0,                              // and    edi,0xffffffe0
         0x74, 0x3f,                                    // je     L4
         0x31, 0xd2,                                    // xor    edx,edx
         0xeb, 0x07,                                    // jmp    L2
         // L1:
         0x83, 0xc2, 0x20,                              // add    edx,0x20
         0x39, 0xd7,                                    // cmp    edi,edx
         0x76, 0x31,                                    // jbe    L3
         // L2:
         0xc5, 0xfa, 0x6f, 0x14, 0x13,                  // vmovdqu xmm2,XMMWORD PTR [ebx+edx*1]
         0xc5, 0xfa, 0x6f, 0x1c, 0x11,                  // vmovdqu xmm3,XMMWORD PTR [ecx+edx*1]
         0xc4, 0xe3, 0x6d, 0x38, 0x44, 0x13, 0x10,      // vinserti128 ymm0,ymm2,XMMWORD PTR [ebx+edx*1+0x10],0x1
         0x01,
         0xc4, 0xe3, 0x65, 0x38, 0x4c, 0x11, 0x10,      // vinserti128 ymm1,ymm3,XMMWORD PTR [ecx+edx*1+0x10],0x1
         0x01,
         0xc5, 0xfd, 0x74, 0xc1,                        // vpcmpeqb ymm0,ymm0,ymm1
         0xc5, 0xfd, 0xd7, 0xc0,                        // vpmovmskb eax,ymm0
         0x83, 0xf8, 0xff,                              // cmp    eax,0xffffffff
         0x74, 0xd2,                                    // je     L1
         0x31, 0xc0,                                    // xor    eax,eax
         0xc5, 0xf8, 0x77,                              // vzeroupper
         0x5b,                                          // pop    ebx
         0x5e,                                          // pop    esi
         0x5f,                                          // pop    edi
         0x5d,                                          // pop    ebp
         0xc3,                                          // ret
         // L3:
         0xc5, 0xf8, 0x77,                              // vzeroupper
         // L4:
         0x89, 0xf8,                                    // mov    eax,edi
         0x39, 0xfe,                                    // cmp    esi,edi
         0x77, 0x11,                                    // ja     L7
         // L5:
         0x5b,                                          // pop    ebx
         0xb8, 0x01, 0x00, 0x00, 0x00,                  // mov    eax,0x1
         0x5e,                                          // pop    esi
         0x5f,                                          // pop    edi
         0x5d,                                          // pop    ebp
         0xc3,                                          // ret
         // L6:
         0x83, 0xc0, 0x01,                              // add    eax,0x1
         0x39, 0xc6,                                    // cmp    esi,eax
         0x76, 0xef,                                    // jbe    L5
         // L7:
         0x0f, 0xb6, 0x14, 0x03,                        // movzx  edx,BYTE PTR [ebx+eax*1]
         0x38, 0x14, 0x01,                              // cmp    BYTE PTR [ecx+eax*1],dl
         0x74, 0xf0,                                    // je     L6
         0x5b,                                          // pop    ebx
         0x31, 0xc0,                                    // xor    eax,eax
         0x5e,                                          // pop    esi
         0x5f,                                          // pop    edi
         0x5d,                                          // pop    ebp
         0xc3                                           // ret
     }));
 }
 /// <summary>
 /// Create native code method that calculate inner product of two <see cref="float"/> arrays.
 /// </summary>
 /// <returns>Native code method of inner product calculation</returns>
 private NativeMethodHandle <InnerProductDelegate> CreateInnerProductMethodHandle()
 {
     // void __stdcall
     // inner_product(
     //     float* dst,
     //     const float* src1,
     //     const float* src2,
     //     int size) noexcept
     // {
     //     constexpr auto stride = static_cast<int>(sizeof(__m128) / sizeof(float));
     //
     //     for (int i = 0, im = size - stride; i <= im; i += stride) {
     //         _mm_storeu_ps(
     //             &dst[i],
     //             _mm_mul_ps(
     //                 _mm_loadu_ps(&src1[i]),
     //                 _mm_loadu_ps(&src2[i])));
     //     }
     //
     //     const auto remSize = size % stride;
     //     if (remSize == 0) {
     //         return;
     //     }
     //
     //     const auto offset = size - remSize;
     //     for (int i = offset; i < size; i++) {
     //         dst[i] = src1[i] * src2[i];
     //     }
     // }
     return(NativeMethodHandle.Create <InnerProductDelegate>(Environment.Is64BitProcess ? new byte[]
     {
         0x41, 0x83, 0xf9, 0x03,                        // cmp    r9d,0x3
         0x7e, 0x33,                                    // jle    L2
         0x45, 0x8d, 0x51, 0xfc,                        // lea    r10d,[r9-0x4]
         0x31, 0xc0,                                    // xor    eax,eax
         0x41, 0xc1, 0xea, 0x02,                        // shr    r10d,0x2
         0x49, 0x83, 0xc2, 0x01,                        // add    r10,0x1
         0x49, 0xc1, 0xe2, 0x04,                        // shl    r10,0x4
         0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00,      // nop    DWORD PTR [rax+rax*1+0x0]
         0x00,
         // L1:
         0x41, 0x0f, 0x10, 0x04, 0x00,                  // movups xmm0,XMMWORD PTR [r8+rax*1]
         0x0f, 0x10, 0x0c, 0x02,                        // movups xmm1,XMMWORD PTR [rdx+rax*1]
         0x0f, 0x59, 0xc1,                              // mulps  xmm0,xmm1
         0x0f, 0x11, 0x04, 0x01,                        // movups XMMWORD PTR [rcx+rax*1],xmm0
         0x48, 0x83, 0xc0, 0x10,                        // add    rax,0x10
         0x49, 0x39, 0xc2,                              // cmp    r10,rax
         0x75, 0xe7,                                    // jne    L1
         // L2:
         0x45, 0x89, 0xca,                              // mov    r10d,r9d
         0x41, 0xc1, 0xfa, 0x1f,                        // sar    r10d,0x1f
         0x41, 0xc1, 0xea, 0x1e,                        // shr    r10d,0x1e
         0x43, 0x8d, 0x04, 0x11,                        // lea    eax,[r9+r10*1]
         0x83, 0xe0, 0x03,                              // and    eax,0x3
         0x44, 0x29, 0xd0,                              // sub    eax,r10d
         0x74, 0x49,                                    // je     L4
         0x45, 0x89, 0xca,                              // mov    r10d,r9d
         0x41, 0x29, 0xc2,                              // sub    r10d,eax
         0x45, 0x39, 0xd1,                              // cmp    r9d,r10d
         0x7e, 0x3e,                                    // jle    L4
         0x41, 0x83, 0xe9, 0x01,                        // sub    r9d,0x1
         0x4d, 0x63, 0xda,                              // movsxd r11,r10d
         0x45, 0x29, 0xd1,                              // sub    r9d,r10d
         0x4a, 0x8d, 0x04, 0x9d, 0x00, 0x00, 0x00,      // lea    rax,[r11*4+0x0]
         0x00,
         0x4f, 0x8d, 0x4c, 0x0b, 0x01,                  // lea    r9,[r11+r9*1+0x1]
         0x49, 0xc1, 0xe1, 0x02,                        // shl    r9,0x2
         0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00,      // nop    WORD PTR cs:[rax+rax*1+0x0]
         0x00, 0x00, 0x00,
         // L3:
         0xf3, 0x0f, 0x10, 0x04, 0x02,                  // movss  xmm0,DWORD PTR [rdx+rax*1]
         0xf3, 0x41, 0x0f, 0x59, 0x04, 0x00,            // mulss  xmm0,DWORD PTR [r8+rax*1]
         0xf3, 0x0f, 0x11, 0x04, 0x01,                  // movss  DWORD PTR [rcx+rax*1],xmm0
         0x48, 0x83, 0xc0, 0x04,                        // add    rax,0x4
         0x4c, 0x39, 0xc8,                              // cmp    rax,r9
         0x75, 0xe7,                                    // jne    L3
         // L4:
         0xc3,                                          // ret
     } : new byte[]
     {
         0x55,                                          // push   ebp
         0x89, 0xe5,                                    // mov    ebp,esp
         0x57,                                          // push   edi
         0x56,                                          // push   esi
         0x8b, 0x75, 0x14,                              // mov    esi,DWORD PTR [ebp+0x14]
         0x53,                                          // push   ebx
         0x8b, 0x55, 0x08,                              // mov    edx,DWORD PTR [ebp+0x8]
         0x8b, 0x5d, 0x0c,                              // mov    ebx,DWORD PTR [ebp+0xc]
         0x83, 0xe4, 0xf0,                              // and    esp,0xfffffff0
         0x8b, 0x4d, 0x10,                              // mov    ecx,DWORD PTR [ebp+0x10]
         0x83, 0xfe, 0x03,                              // cmp    esi,0x3
         0x7e, 0x21,                                    // jle    L2
         0x8d, 0x7e, 0xfc,                              // lea    edi,[esi-0x4]
         0x31, 0xc0,                                    // xor    eax,eax
         0x83, 0xe7, 0xfc,                              // and    edi,0xfffffffc
         0x83, 0xc7, 0x04,                              // add    edi,0x4
         // L1:
         0x0f, 0x10, 0x04, 0x81,                        // movups xmm0,XMMWORD PTR [ecx+eax*4]
         0x0f, 0x10, 0x0c, 0x83,                        // movups xmm1,XMMWORD PTR [ebx+eax*4]
         0x0f, 0x59, 0xc1,                              // mulps  xmm0,xmm1
         0x0f, 0x11, 0x04, 0x82,                        // movups XMMWORD PTR [edx+eax*4],xmm0
         0x83, 0xc0, 0x04,                              // add    eax,0x4
         0x39, 0xf8,                                    // cmp    eax,edi
         0x75, 0xea,                                    // jne    L1
         // L2:
         0x89, 0xf7,                                    // mov    edi,esi
         0xc1, 0xff, 0x1f,                              // sar    edi,0x1f
         0xc1, 0xef, 0x1e,                              // shr    edi,0x1e
         0x8d, 0x04, 0x3e,                              // lea    eax,[esi+edi*1]
         0x83, 0xe0, 0x03,                              // and    eax,0x3
         0x29, 0xf8,                                    // sub    eax,edi
         0x74, 0x2a,                                    // je     L4
         0x89, 0xf7,                                    // mov    edi,esi
         0x29, 0xc7,                                    // sub    edi,eax
         0x39, 0xfe,                                    // cmp    esi,edi
         0x7e, 0x22,                                    // jle    L4
         0xc1, 0xe7, 0x02,                              // shl    edi,0x2
         0x8d, 0x04, 0x3b,                              // lea    eax,[ebx+edi*1]
         0x8d, 0x1c, 0xb3,                              // lea    ebx,[ebx+esi*4]
         0x01, 0xf9,                                    // add    ecx,edi
         0x01, 0xfa,                                    // add    edx,edi
         // L3:
         0xd9, 0x00,                                    // fld    DWORD PTR [eax]
         0x83, 0xc0, 0x04,                              // add    eax,0x4
         0x83, 0xc1, 0x04,                              // add    ecx,0x4
         0x83, 0xc2, 0x04,                              // add    edx,0x4
         0xd8, 0x49, 0xfc,                              // fmul   DWORD PTR [ecx-0x4]
         0xd9, 0x5a, 0xfc,                              // fstp   DWORD PTR [edx-0x4]
         0x39, 0xd8,                                    // cmp    eax,ebx
         0x75, 0xeb,                                    // jne    L3
         // L4:
         0x8d, 0x65, 0xf4,                              // lea    esp,[ebp-0xc]
         0x5b,                                          // pop    ebx
         0x5e,                                          // pop    esi
         0x5f,                                          // pop    edi
         0x5d,                                          // pop    ebp
         0xc2, 0x10, 0x00,                              // ret    0x10
     }));
 }