// Determines which triangles of the surface are facing towards the light origin. // The facing array should be allocated with one extra index than the number of surface triangles, which will be used to handle dangling edge silhouettes. static void R_CalcInteractionFacing(IRenderEntity ent, SrfTriangles tri, IRenderLight light, ref SrfCullInfo cullInfo) { if (cullInfo.facing != null) { return; } Vector3 localLightOrigin; R_GlobalPointToLocal(ent.modelMatrix, light.globalLightOrigin, out localLightOrigin); var numFaces = tri.numIndexes / 3; if (tri.facePlanes == null || !tri.facePlanesCalculated) { R_DeriveFacePlanes(tri); } cullInfo.facing = (byte *)R_StaticAlloc((numFaces + 1) * sizeof(byte)); // calculate back face culling var planeSide = stackalloc float[numFaces + floatX.ALLOC16]; planeSide = (float *)_alloca16(planeSide); // exact geometric cull against face fixed(Plane *facePlanesP = tri.facePlanes) Simd.Dotcp(planeSide, localLightOrigin, facePlanesP, numFaces); Simd.CmpGE(cullInfo.facing, planeSide, 0f, numFaces); cullInfo.facing[numFaces] = 1; // for dangling edges to reference }
public unsafe int DecodePCM(SoundSample sample, int sampleOffset44k, int sampleCount44k, float *dest) { lastFormat = WAVE_FORMAT_TAG.PCM; lastSample = sample; var shift = 22050 / sample.objectInfo.nSamplesPerSec; var sampleOffset = sampleOffset44k >> shift; var sampleCount = sampleCount44k >> shift; if (sample.nonCacheData == null) { Debug.Assert(false); failed = true; return(0); } // this should never happen ( note: I've seen that happen with the main thread down in idGameLocal::MapClear clearing entities - TTimo ) if (!sample.FetchFromCache(sampleOffset * sizeof(short), out var first, out var pos, out var size, false)) { failed = true; return(0); } var readSamples = size - pos < sampleCount * sizeof(short) ? (size - pos) / sizeof(short) : sampleCount; // duplicate samples for 44kHz output fixed(byte *_ = &first.v[first.o + pos]) Simd.UpSamplePCMTo44kHz(dest, (short *)_, readSamples, sample.objectInfo.nSamplesPerSec, sample.objectInfo.nChannels); return(readSamples << shift); }
void IDumpableAsText.DumpAsText(TextWriter writer) { var pbag = new List <KeyValuePair <String, String> >(); Func <KeyValuePair <String, String>, String> fmt = kvp => { var maxKey = pbag.Max(kvp1 => kvp1.Key.Length); return(String.Format(" {0} : {1}", kvp.Key.PadRight(maxKey), kvp.Value)); }; Action <String> fillPbag = s => { foreach (var line in s.SplitLines().Skip(1).SkipLast(1)) { var m = Regex.Match(line, "^(?<key>.*?):(?<value>.*)$"); var key = m.Result("${key}").Trim(); var value = m.Result("${value}").Trim(); pbag.Add(new KeyValuePair <String, String>(key, value)); } }; writer.WriteLine("Device #{0} \"{1}\" (/pci:{2}/dev:{3})", Index, Name, PciBusId, PciDeviceId); fillPbag(Simd.ToString()); fillPbag(Clock.ToString()); fillPbag(Memory.ToString()); fillPbag(Caps.ToString()); pbag.ForEach(kvp => writer.WriteLine(fmt(kvp))); }
public void Encrypt(byte[] input, byte[] output) { int position = 0; int left = input.Length; var key0 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[0 * BlockSize]); var key1 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[1 * BlockSize]); var key2 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[2 * BlockSize]); var key3 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[3 * BlockSize]); var key4 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[4 * BlockSize]); var key5 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[5 * BlockSize]); var key6 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[6 * BlockSize]); var key7 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[7 * BlockSize]); var key8 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[8 * BlockSize]); var key9 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[9 * BlockSize]); var key10 = Unsafe.ReadUnaligned <Vector128 <byte> >(ref enc[10 * BlockSize]); while (left >= BlockSize) { var block = Unsafe.ReadUnaligned <Vector128 <byte> >(ref input[position]); block = Aes.Encrypt(block, key0); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key1); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key2); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key3); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key4); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key5); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key6); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key7); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key8); block = Aes.MixColumns(block); block = Aes.Encrypt(block, key9); block = Simd.Xor(block, key10); Unsafe.WriteUnaligned(ref output[position], block); position += BlockSize; left -= BlockSize; } }
// Special transform to make the mesh seem fat or skinny. May be used for zombie deaths void TransformScaledVerts(DrawVert *verts, JointMat *joints, float scale) { var scaledWeights = stackalloc Vector4[numWeights + Vector4.ALLOC16]; scaledWeights = _alloca16(scaledWeights); Simd.Mul(&scaledWeights[0].x, scale, &scaledWeights[0].x, numWeights * 4); fixed(int *weightIndexI = weightIndex) Simd.TransformVerts(verts, texCoords.Length, joints, scaledWeights, weightIndexI, numWeights); }
public void Vector256Sum <T>(T value, T expected) where T : unmanaged { var v = Simd <T> .CreateVector256(value); var result = v.Sum(); Assert.AreEqual(expected, result); }
public void Add <T>(T value, T expected) where T : unmanaged { var vector = Simd <T> .CreateVector128(value); var expectedVector = Simd <T> .CreateVector128(expected); var result = Sse2 <T> .Add(vector, vector); Assert.AreEqual(expectedVector, result); }
public void Subtract <T>(T value, T expected) where T : unmanaged { var vector = Simd <T> .CreateVector256(value); var expectedVector = Simd <T> .CreateVector256(expected); var result = Avx2 <T> .Subtract(vector, vector); Assert.AreEqual(expectedVector, result); }
/// <summary> /// Gets array of sum rows. /// </summary> /// <param name="matrix">the matrix.</param> /// <typeparam name="T">unmanaged type.</typeparam> /// <returns></returns> /// <exception cref="NullReferenceException"></exception> public static T[] SumByRows <T>(this Matrix <T> matrix) where T : unmanaged { var array = new T[matrix.Rows]; for (var i = 0; i < matrix.Rows; i++) { array[i] = Simd.Sum(matrix[i]); } return(array); }
public void Add <T>(T a, T b, T expected) where T : unmanaged { var vectorA = Simd <T> .CreateVector128(a); var vectorB = Simd <T> .CreateVector128(b); var expectedVector = Simd <T> .CreateVector128(expected); var result = Sse2 <T> .Add(vectorA, vectorB); Assert.AreEqual(expectedVector, result); }
public void Subtract <T>(T a, T b, T expected) where T : unmanaged { var vectorA = Simd <T> .CreateVector256(a); var vectorB = Simd <T> .CreateVector256(b); var expectedVector = Simd <T> .CreateVector256(expected); var result = Avx2 <T> .Subtract(vectorA, vectorB); Assert.AreEqual(expectedVector, result); }
// We want to cull a little on the sloppy side, because the pre-clipping of geometry to the lights in dmap will give many cases that are right // at the border we throw things out on the border, because if any one vertex is clearly inside, the entire triangle will be accepted. static void R_CalcInteractionCullBits(IRenderEntity ent, SrfTriangles tri, IRenderLight light, ref SrfCullInfo cullInfo) { int i, frontBits; if (cullInfo.cullBits != null) { return; } frontBits = 0; // cull the triangle surface bounding box for (i = 0; i < 6; i++) { R_GlobalPlaneToLocal(ent.modelMatrix, -light.frustum[i], out cullInfo.localClipPlanes[i]); // get front bits for the whole surface if (tri.bounds.PlaneDistance(cullInfo.localClipPlanes[i]) >= IInteraction.LIGHT_CLIP_EPSILON) { frontBits |= 1 << i; } } // if the surface is completely inside the light frustum if (frontBits == ((1 << 6) - 1)) { cullInfo.cullBits = IInteraction.LIGHT_CULL_ALL_FRONT; return; } cullInfo.cullBits = (byte *)R_StaticAlloc(tri.numVerts * sizeof(byte)); Simd.Memset(cullInfo.cullBits, 0, tri.numVerts * sizeof(byte)); var planeSide = stackalloc float[tri.numVerts + floatX.ALLOC16]; planeSide = (float *)_alloca16(planeSide); for (i = 0; i < 6; i++) { // if completely infront of this clipping plane if ((frontBits & (1 << i)) != 0) { continue; fixed(DrawVert *vertsD = tri.verts) Simd.Dotpd(planeSide, cullInfo.localClipPlanes[i], vertsD, tri.numVerts); Simd.CmpLTb(cullInfo.cullBits, (byte)i, planeSide, IInteraction.LIGHT_CLIP_EPSILON, tri.numVerts); } }
public Bounds CalcBounds(JointMat[] joints) { Bounds bounds = new(); var verts = texCoords.Length * sizeof(DrawVert) < 600000 ? stackalloc DrawVert[texCoords.Length + DrawVert.ALLOC16] : new DrawVert[texCoords.Length + DrawVert.ALLOC16]; verts = _alloca16T(verts); fixed(DrawVert *vertsD = verts) fixed(JointMat * jointsJ = joints) { TransformVerts(vertsD, jointsJ); Simd.MinMaxd(out bounds[0], out bounds[1], vertsD, texCoords.Length); } return(bounds); }
// Reads section of data from a wave file into pBuffer and returns how much read in pdwSizeRead, reading not more than dwSizeToRead. // This uses mck to determine where to start reading from. So subsequent calls will be continue where the last left off unless // Reset() is called. public unsafe int Read(byte *pBuffer, int dwSizeToRead, Action <int> pdwSizeRead) { if (ogg != null) { return(ReadOGG(pBuffer, dwSizeToRead, pdwSizeRead)); } else if (mbIsReadingFromMemory) { if (mpbDataCur == 0) { return(-1); } if (mpbDataCur + dwSizeToRead > mulDataSize) { dwSizeToRead = mulDataSize - mpbDataCur; fixed(void *mpbDataCur_ = &mpbData[mpbDataCur]) Simd.Memcpy(pBuffer, mpbDataCur_, dwSizeToRead); mpbDataCur += dwSizeToRead; pdwSizeRead?.Invoke(dwSizeToRead); return(dwSizeToRead); } else { if (mhmmio == null || pBuffer == null) { return(-1); } dwSizeToRead = mhmmio.Read(pBuffer, dwSizeToRead); // this is hit by ogg code, which does it's own byte swapping internally if (!isOgg) { LittleRevBytes(pBuffer, 2, dwSizeToRead / 2); } pdwSizeRead?.Invoke(dwSizeToRead); return(dwSizeToRead); } }
public double MinDoubleAvx() { return(Simd.Min(MatrixAvxX64)); }
public float MinFloatAvx() { return(Simd.Min(MatrixAvxX32)); }
public unsafe int DecodeOGG(SoundSample sample, int sampleOffset44k, int sampleCount44k, float *dest) { int readSamples, totalSamples; var shift = 22050 / sample.objectInfo.nSamplesPerSec; var sampleOffset = sampleOffset44k >> shift; var sampleCount = sampleCount44k >> shift; // open OGG file if not yet opened if (lastSample == null) { // make sure there is enough space for another decoder if (ISampleDecoder.decoderMemoryAllocator.FreeBlockMemory < ISampleDecoder.MIN_OGGVORBIS_MEMORY) { return(0); } if (sample.nonCacheData == null) { Debug.Assert(false); failed = true; return(0); } // this should never happen file.SetData(sample.nonCacheData, sample.objectMemSize); if (OggVorbis.ov_openFile(file, ogg) < 0) { failed = true; return(0); } lastFormat = WAVE_FORMAT_TAG.OGG; lastSample = sample; } // seek to the right offset if necessary if (sampleOffset != lastSampleOffset && ov_pcm_seek(ogg, sampleOffset / sample.objectInfo.nChannels) != 0) { failed = true; return(0); } lastSampleOffset = sampleOffset; // decode OGG samples totalSamples = sampleCount; readSamples = 0; do { float **samples; var ret = (int)ov_read_float(ogg, &samples, totalSamples / sample.objectInfo.nChannels, null); if (ret == 0) { failed = true; break; } if (ret < 0) { failed = true; return(0); } ret *= sample.objectInfo.nChannels; Simd.UpSampleOGGTo44kHz(dest + (readSamples << shift), samples, ret, sample.objectInfo.nSamplesPerSec, sample.objectInfo.nChannels); readSamples += ret; totalSamples -= ret; } while (totalSamples > 0); lastSampleOffset += readSamples; return(readSamples << shift); }
/// <summary> /// Gets sum by row of matrix. /// </summary> /// <param name="matrix">the matrix.</param> /// <param name="dimension">row index.</param> /// <typeparam name="T">unmanaged type.</typeparam> /// <returns>Sum row by index</returns> /// <exception cref="NullReferenceException"></exception> public static T SumByRow <T>(this Matrix <T> matrix, int dimension) where T : unmanaged { return(Simd.Sum(matrix[dimension])); }
/// <summary> /// Gets sum of vector. /// </summary> /// <param name="vector">vector</param> public static short Sum(this Vector <short> vector) { return(Simd.Sum(vector.Array)); }
// are dangling edges that are outside the light frustum still making planes? public static SrfTriangles R_CreateVertexProgramTurboShadowVolume(RenderEntityLocal ent, SrfTriangles tri, RenderLightLocal light, SrfCullInfo cullInfo) { int i, j; SrfTriangles newTri; SilEdge sil; GlIndex[] indexes; byte[] facing; R_CalcInteractionFacing(ent, tri, light, cullInfo); if (r_useShadowProjectedCull.Bool) { R_CalcInteractionCullBits(ent, tri, light, cullInfo); } var numFaces = tri.numIndexes / 3; var numShadowingFaces = 0; facing = cullInfo.facing; // if all the triangles are inside the light frustum if (cullInfo.cullBits == LIGHT_CULL_ALL_FRONT || !r_useShadowProjectedCull.Bool) { // count the number of shadowing faces for (i = 0; i < numFaces; i++) { numShadowingFaces += facing[i]; } numShadowingFaces = numFaces - numShadowingFaces; } else { // make all triangles that are outside the light frustum "facing", so they won't cast shadows indexes = tri.indexes; byte *modifyFacing = cullInfo.facing; byte *cullBits = cullInfo.cullBits; for (j = i = 0; i < tri.numIndexes; i += 3, j++) { if (modifyFacing[j] == 0) { var i1 = indexes[i + 0]; var i2 = indexes[i + 1]; var i3 = indexes[i + 2]; if ((cullBits[i1] & cullBits[i2] & cullBits[i3]) != 0) { modifyFacing[j] = 1; } else { numShadowingFaces++; } } } } // no faces are inside the light frustum and still facing the right way if (numShadowingFaces == 0) { return(null); } // shadowVerts will be NULL on these surfaces, so the shadowVerts will be taken from the ambient surface newTri = R_AllocStaticTriSurf(); newTri.numVerts = tri.numVerts * 2; // alloc the max possible size #if USE_TRI_DATA_ALLOCATOR R_AllocStaticTriSurfIndexes(newTri, (numShadowingFaces + tri.numSilEdges) * 6); GlIndex tempIndexes = newTri.indexes; GlIndex shadowIndexes = newTri.indexes; #else GlIndex tempIndexes = new GlIndex[tri.numSilEdges * 6]; GlIndex shadowIndexes = tempIndexes; #endif // create new triangles along sil planes for (sil = tri.silEdges, i = tri.numSilEdges; i > 0; i--, sil++) { int f1 = facing[sil.p1], f2 = facing[sil.p2]; if ((f1 ^ f2) == 0) { continue; } int v1 = sil.v1 << 1, v2 = sil.v2 << 1; // set the two triangle winding orders based on facing without using a poorly-predictable branch shadowIndexes[0] = v1; shadowIndexes[1] = v2 ^ f1; shadowIndexes[2] = v2 ^ f2; shadowIndexes[3] = v1 ^ f2; shadowIndexes[4] = v1 ^ f1; shadowIndexes[5] = v2 ^ 1; shadowIndexes += 6; } int numShadowIndexes = shadowIndexes - tempIndexes; // we aren't bothering to separate front and back caps on these newTri.numIndexes = newTri.numShadowIndexesNoFrontCaps = numShadowIndexes + numShadowingFaces * 6; newTri.numShadowIndexesNoCaps = numShadowIndexes; newTri.shadowCapPlaneBits = SHADOW_CAP_INFINITE; #if USE_TRI_DATA_ALLOCATOR // decrease the size of the memory block to only store the used indexes R_ResizeStaticTriSurfIndexes(newTri, newTri.numIndexes); #else // allocate memory for the indexes R_AllocStaticTriSurfIndexes(newTri, newTri.numIndexes); // copy the indexes we created for the sil planes Simd.Memcpy(newTri.indexes, tempIndexes, numShadowIndexes * sizeof(tempIndexes[0])); #endif // these have no effect, because they extend to infinity newTri.bounds.Clear(); // put some faces on the model and some on the distant projection indexes = tri.indexes; shadowIndexes = newTri.indexes + numShadowIndexes; for (i = 0, j = 0; i < tri.numIndexes; i += 3, j++) { if (facing[j] != 0) { continue; } var i0 = indexes[i + 0] << 1; shadowIndexes[2] = i0; shadowIndexes[3] = i0 ^ 1; var i1 = indexes[i + 1] << 1; shadowIndexes[1] = i1; shadowIndexes[4] = i1 ^ 1; var i2 = indexes[i + 2] << 1; shadowIndexes[0] = i2; shadowIndexes[5] = i2 ^ 1; shadowIndexes += 6; } return(newTri); }
private static void Block(uint[] state, ReadOnlySpan <byte> data) { var msg = new byte[64]; // Load state var hash_abcd = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref state[0])); var hash_efgh = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref state[4])); var k0 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x00])); var k1 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x04])); var k2 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x08])); var k3 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x0c])); var k4 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x10])); var k5 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x14])); var k6 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x18])); var k7 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x1c])); var k8 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x20])); var k9 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x24])); var k10 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x28])); var k11 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x2c])); var k12 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x30])); var k13 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x34])); var k14 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x38])); var k15 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref Unsafe.As <uint, byte>(ref k[0x3c])); while (data.Length >= 64) { // Save state var save_abcd = hash_abcd; var save_efgh = hash_efgh; var from = MemoryMarshal.Cast <byte, uint>(data); var to = MemoryMarshal.Cast <byte, uint>(msg); // Reverse for little endian for (int i = 0; i < 16; ++i) { to[i] = BinaryPrimitives.ReverseEndianness(from[i]); } // Load message var msg0 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref msg[0]); var msg1 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref msg[16]); var msg2 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref msg[32]); var msg3 = Unsafe.ReadUnaligned <Vector128 <uint> >(ref msg[48]); Vector128 <uint> wk, temp_abcd; // Rounds 0-3 wk = Simd.Add(msg0, k0); msg0 = Sha256.SchedulePart1(msg0, msg1); msg0 = Sha256.SchedulePart2(msg0, msg2, msg3); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 4-7 wk = Simd.Add(msg1, k1); msg1 = Sha256.SchedulePart1(msg1, msg2); msg1 = Sha256.SchedulePart2(msg1, msg3, msg0); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 8-11 wk = Simd.Add(msg2, k2); msg2 = Sha256.SchedulePart1(msg2, msg3); msg2 = Sha256.SchedulePart2(msg2, msg0, msg1); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 12-15 wk = Simd.Add(msg3, k3); msg3 = Sha256.SchedulePart1(msg3, msg0); msg3 = Sha256.SchedulePart2(msg3, msg1, msg2); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 16-19 wk = Simd.Add(msg0, k4); msg0 = Sha256.SchedulePart1(msg0, msg1); msg0 = Sha256.SchedulePart2(msg0, msg2, msg3); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 20-23 wk = Simd.Add(msg1, k5); msg1 = Sha256.SchedulePart1(msg1, msg2); msg1 = Sha256.SchedulePart2(msg1, msg3, msg0); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 24-27 wk = Simd.Add(msg2, k6); msg2 = Sha256.SchedulePart1(msg2, msg3); msg2 = Sha256.SchedulePart2(msg2, msg0, msg1); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 28-31 wk = Simd.Add(msg3, k7); msg3 = Sha256.SchedulePart1(msg3, msg0); msg3 = Sha256.SchedulePart2(msg3, msg1, msg2); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 32-35 wk = Simd.Add(msg0, k8); msg0 = Sha256.SchedulePart1(msg0, msg1); msg0 = Sha256.SchedulePart2(msg0, msg2, msg3); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 36-39 wk = Simd.Add(msg1, k9); msg1 = Sha256.SchedulePart1(msg1, msg2); msg1 = Sha256.SchedulePart2(msg1, msg3, msg0); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 40-43 wk = Simd.Add(msg2, k10); msg2 = Sha256.SchedulePart1(msg2, msg3); msg2 = Sha256.SchedulePart2(msg2, msg0, msg1); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 44-47 wk = Simd.Add(msg3, k11); msg3 = Sha256.SchedulePart1(msg3, msg0); msg3 = Sha256.SchedulePart2(msg3, msg1, msg2); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 48-51 wk = Simd.Add(msg0, k12); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 52-55 wk = Simd.Add(msg1, k13); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 56-59 wk = Simd.Add(msg2, k14); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Rounds 60-63 wk = Simd.Add(msg3, k15); temp_abcd = hash_abcd; hash_abcd = Sha256.HashLower(hash_abcd, hash_efgh, wk); hash_efgh = Sha256.HashUpper(hash_efgh, temp_abcd, wk); // Combine state hash_abcd = Simd.Add(hash_abcd, save_abcd); hash_efgh = Simd.Add(hash_efgh, save_efgh); data = data.Slice(64); } Unsafe.WriteUnaligned(ref Unsafe.As <uint, byte>(ref state[0]), hash_abcd); Unsafe.WriteUnaligned(ref Unsafe.As <uint, byte>(ref state[4]), hash_efgh); }
/// <summary> /// Summation matrix. /// </summary> /// <param name="matrix">the matrix.</param> /// <typeparam name="T">unmanaged type.</typeparam> /// <returns>Sum whole of matrix.</returns> /// <exception cref="NullReferenceException"></exception> public static T Sum <T>(this Matrix <T> matrix) where T : unmanaged { return(Simd.Sum(matrix._Matrix)); }
/// <summary> /// Gets sum of vector. /// </summary> /// <param name="vector">vector</param> public static T Sum <T>(this Vector <T> vector) where T : unmanaged { return(Simd.Sum(vector.Array, vector.Length)); }
/// <summary> /// Gets sum of vector. /// </summary> /// <param name="vector">vector</param> public static double Sum(this Vector <double> vector) { return(Simd.Sum(vector.Array)); }
/// <summary> /// Gets sum of vector. /// </summary> /// <param name="vector">vector</param> public static float Sum(this Vector <float> vector) { return(Simd.Sum(vector.Array)); }
public double MinIntAvx() { return(Simd.Min(MatrixAvxIntX32)); }
public bool EqualsAvx() { return(Simd.Equals(_matrix1, _matrix2)); }
// If we resort the vertexes so all silverts come first, we can save some work here. public unsafe static LocalTrace R_LocalTrace(Vector3 start, Vector3 end, float radius, SrfTriangles tri) { int i, j; Plane[] planes = new Plane[4]; LocalTrace hit = new(); int c_testEdges, c_testPlanes, c_intersect; Vector3 startDir; byte totalOr; float radiusSqr; #if TEST_TRACE Timer trace_timer = new(); trace_timer.Start(); #endif hit.fraction = 1f; // create two planes orthogonal to each other that intersect along the trace startDir = end - start; startDir.Normalize(); startDir.NormalVectors(out planes[0].Normal, out planes[1].Normal); planes[0].d = -start * planes[0].Normal; planes[1].d = -start * planes[1].Normal; // create front and end planes so the trace is on the positive sides of both planes[2] = startDir; planes[2].d = -start * planes[2].Normal; planes[3] = -startDir; planes[3].d = -end * planes[3].Normal; // catagorize each point against the four planes var cullBits = stackalloc byte[tri.numVerts]; Simd.TracePointCull(cullBits, totalOr, radius, planes, tri.verts, tri.numVerts); // if we don't have points on both sides of both the ray planes, no intersection if (((totalOr ^ (totalOr >> 4)) & 3) != 0) /*common.Printf("nothing crossed the trace planes\n");*/ return { (hit); } // if we don't have any points between front and end, no intersection if (((totalOr ^ (totalOr >> 1)) & 4) != 0) /*common.Printf("trace didn't reach any triangles\n");*/ return { (hit); } // scan for triangles that cross both planes c_testPlanes = c_testEdges = c_intersect = 0; radiusSqr = Square(radius); startDir = end - start; if (tri.facePlanes == null || !tri.facePlanesCalculated) { R_DeriveFacePlanes(tri); } for (i = 0, j = 0; i < tri.numIndexes; i += 3, j++) { float d1, d2, f, d, edgeLengthSqr; byte triOr; Vector3 cross, edge; Vector3[] dir = new Vector3[3]; // get sidedness info for the triangle triOr = cullBits[tri.indexes[i + 0]]; triOr |= cullBits[tri.indexes[i + 1]]; triOr |= cullBits[tri.indexes[i + 2]]; // if we don't have points on both sides of both the ray planes, no intersection if (((triOr ^ (triOr >> 4)) & 3) != 0) { continue; } // if we don't have any points between front and end, no intersection if (((triOr ^ (triOr >> 1)) & 4) != 0) { continue; } c_testPlanes++; ref Plane plane = ref tri.facePlanes[j]; d1 = plane.Distance(start); d2 = plane.Distance(end); if (d1 <= d2) { continue; // comning at it from behind or parallel } if (d1 < 0f) { continue; // starts past it } if (d2 > 0f) { continue; // finishes in front of it } f = d1 / (d1 - d2); if (f < 0f) { continue; // shouldn't happen } if (f >= hit.fraction) { continue; // have already hit something closer } c_testEdges++; // find the exact point of impact with the plane var point = start + f * startDir; // see if the point is within the three edges if radius > 0 the triangle is expanded with a circle in the triangle plane dir[0] = tri.verts[tri.indexes[i + 0]].xyz - point; dir[1] = tri.verts[tri.indexes[i + 1]].xyz - point; cross = dir[0].Cross(dir[1]); d = plane.Normal * cross; if (d > 0f) { if (radiusSqr <= 0f) { continue; } edge = tri.verts[tri.indexes[i + 0]].xyz - tri.verts[tri.indexes[i + 1]].xyz; edgeLengthSqr = edge.LengthSqr; if (cross.LengthSqr > edgeLengthSqr * radiusSqr) { continue; } d = edge * dir[0]; if (d < 0f) { edge = tri.verts[tri.indexes[i + 0]].xyz - tri.verts[tri.indexes[i + 2]].xyz; d = edge * dir[0]; if (d < 0f && dir[0].LengthSqr > radiusSqr) { continue; } } else if (d > edgeLengthSqr) { edge = tri.verts[tri.indexes[i + 1]].xyz - tri.verts[tri.indexes[i + 2]].xyz; d = edge * dir[1]; if (d < 0f && dir[1].LengthSqr > radiusSqr) { continue; } } } dir[2] = tri.verts[tri.indexes[i + 2]].xyz - point; cross = dir[1].Cross(dir[2]); d = plane.Normal * cross; if (d > 0f) { if (radiusSqr <= 0f) { continue; } edge = tri.verts[tri.indexes[i + 1]].xyz - tri.verts[tri.indexes[i + 2]].xyz; edgeLengthSqr = edge.LengthSqr; if (cross.LengthSqr > edgeLengthSqr * radiusSqr) { continue; } d = edge * dir[1]; if (d < 0f) { edge = tri.verts[tri.indexes[i + 1]].xyz - tri.verts[tri.indexes[i + 0]].xyz; d = edge * dir[1]; if (d < 0f && dir[1].LengthSqr > radiusSqr) { continue; } } else if (d > edgeLengthSqr) { edge = tri.verts[tri.indexes[i + 2]].xyz - tri.verts[tri.indexes[i + 0]].xyz; d = edge * dir[2]; if (d < 0f && dir[2].LengthSqr > radiusSqr) { continue; } } } cross = dir[2].Cross(dir[0]); d = plane.Normal * cross; if (d > 0f) { if (radiusSqr <= 0f) { continue; } edge = tri.verts[tri.indexes[i + 2]].xyz - tri.verts[tri.indexes[i + 0]].xyz; edgeLengthSqr = edge.LengthSqr; if (cross.LengthSqr > edgeLengthSqr * radiusSqr) { continue; } d = edge * dir[2]; if (d < 0f) { edge = tri.verts[tri.indexes[i + 2]].xyz - tri.verts[tri.indexes[i + 1]].xyz; d = edge * dir[2]; if (d < 0f && dir[2].LengthSqr > radiusSqr) { continue; } } else if (d > edgeLengthSqr) { edge = tri.verts[tri.indexes[i + 0]].xyz - tri.verts[tri.indexes[i + 1]].xyz; d = edge * dir[0]; if (d < 0f && dir[0].LengthSqr > radiusSqr) { continue; } } } // we hit it c_intersect++; hit.fraction = f; hit.normal = plane.Normal; hit.point = point; hit.indexes[0] = tri.indexes[i]; hit.indexes[1] = tri.indexes[i + 1]; hit.indexes[2] = tri.indexes[i + 2]; }
void TransformVerts(DrawVert *verts, JointMat *joints) { fixed(Vector4 *scaledWeightsV = scaledWeights) fixed(int *weightIndexI = weightIndex) Simd.TransformVerts(verts, texCoords.Length, joints, scaledWeightsV, weightIndexI, numWeights); }
/// <summary> /// Gets sum of vector. /// </summary> /// <param name="vector">vector</param> public static int Sum(this Vector <int> vector) { return(Simd.Sum(vector.Array)); }