public static int Main() { // float AssertEqual(Vector256.Create(1f).ToString(), "<1, 1, 1, 1, 1, 1, 1, 1>"); AssertEqual(Vector256.CreateScalar(1f).ToString(), "<1, 0, 0, 0, 0, 0, 0, 0>"); AssertEqual(Vector256.CreateScalarUnsafe(1f).ToScalar().ToString(), "1"); AssertEqual(Vector256.Create(0.0f, 1, 2, 3, 4, 5, 6, 7).ToString(), "<0, 1, 2, 3, 4, 5, 6, 7>"); // double AssertEqual(Vector256.Create(1.0).ToString(), "<1, 1, 1, 1>"); AssertEqual(Vector256.CreateScalar(1.0).ToString(), "<1, 0, 0, 0>"); AssertEqual(Vector256.CreateScalarUnsafe(1.0).ToScalar().ToString(), "1"); AssertEqual(Vector256.Create(0.0, 1, 2, 3).ToString(), "<0, 1, 2, 3>"); // ushort AssertEqual(Vector256.Create((ushort)1).ToString(), "<1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>"); AssertEqual(Vector256.CreateScalar((ushort)1).ToString(), "<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>"); AssertEqual(Vector256.CreateScalarUnsafe((ushort)1).ToScalar().ToString(), "1"); AssertEqual(Vector256.Create((ushort)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).ToString(), "<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>"); // long AssertEqual(Vector256.Create((long)1).ToString(), "<1, 1, 1, 1>"); AssertEqual(Vector256.CreateScalar((long)1).ToString(), "<1, 0, 0, 0>"); AssertEqual(Vector256.CreateScalarUnsafe((long)1).ToScalar().ToString(), "1"); AssertEqual(Vector256.Create((long)0, 1, 2, 3).ToString(), "<0, 1, 2, 3>"); return(retCode); }
public void RunBasicScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); Single value = TestLibrary.Generator.GetSingle(); Vector256 <Single> result = Vector256.CreateScalar(value); ValidateResult(result, value); }
public static Vector256 <double> ToScalarVector256(Vector256 <double> vector) { return(Vector256.CreateScalar(vector.ToScalar())); }
public static Vector4D LoadScalar(this double scalar) => Vector256.CreateScalar(scalar);
static Vec VecInit(double value) => Vector256.CreateScalar(value);
public unsafe int[,] IntegrateUnsafeVectorBranched() { int w = _data.Width(); int h = _data.Height(); int[,] res = new int[h, w]; Vector256 <int> shiftRight = RotateRight; fixed(byte *pSource = &_data[0, 0]) fixed(int *pTarget = &res[0, 0]) { var pSrc = pSource; var pTrg = pTarget; for (var i = 0; i < h; i++) { var j = 0; var p = Vector256.CreateScalar(0); var pr = Vector256.CreateScalar(0); //handle vector part for (; j + Vector256 <int> .Count <= w; j += Vector256 <int> .Count) { var t = Avx2.ConvertToVector256Int32(pSrc); //(int)*(pSrc) var s = Aggregate(p, t); // this code block has to be p = t; // added to handle the in-line t = Avx2.Add(t, s); // recursion: S[i]=a[i]+S[i-1] if (j > 0) { t = Avx2.Add(t, pr); // t += *(pTrg - 1); } if (i > 0) { t = Avx2.Add(t, Avx.LoadVector256(pTrg - w)); if (j > 0) { t = Avx2.Subtract(t, Avx.LoadVector256(pTrg - w - 8)); } } Avx.Store(pTrg, t); pr = t; pSrc += Vector256 <int> .Count; pTrg += Vector256 <int> .Count; } // handle the tail var pr2 = (j == 0 ? 0 : pr.GetElement(Vector256 <int> .Count - 1)); // Vector256.CreateScalar(0); for (; j < w; j++) { var t = (int)*(pSrc); // Avx2.ConvertToVector256Int32(pSrc); if (j > 0) { t += pr2; // t = Avx2.Add(t, pr); } if (i > 0) { t += *(pTrg - w); // Avx2.Add(t, Avx.LoadVector256(pTrg - w)); if (j > 0) { t -= *(pTrg - w - 1); // Avx2.Subtract(t, Avx.LoadVector256(pTrg - w - 8)); } } *pTrg = t; // Avx2.Store(pTrg, t); pr2 = t; // pr = t pSrc++; pTrg++; } } } return(res); }
private void Block4(ReadOnlySpan <byte> m) { var n0 = MemoryMarshal.Cast <byte, uint>(m); var hc0 = IntrinsicsUtils.Create4UInt(n0[0], n0[4], n0[8], n0[12]); hc0 = Avx2.And(hc0, And256); hc0 = Avx2.Add(hc0, Vector256.CreateScalar(_h0)); var n1 = MemoryMarshal.Cast <byte, uint>(m.Slice(3)); var hc1 = IntrinsicsUtils.Create4UInt(n1[0], n1[4], n1[8], n1[12]); hc1 = Avx2.ShiftRightLogical(hc1, 2); hc1 = Avx2.And(hc1, And256); hc1 = Avx2.Add(hc1, Vector256.CreateScalar(_h1)); var n2 = MemoryMarshal.Cast <byte, uint>(m.Slice(6)); var hc2 = IntrinsicsUtils.Create4UInt(n2[0], n2[4], n2[8], n2[12]); hc2 = Avx2.ShiftRightLogical(hc2, 4); hc2 = Avx2.And(hc2, And256); hc2 = Avx2.Add(hc2, Vector256.CreateScalar(_h2)); var n3 = MemoryMarshal.Cast <byte, uint>(m.Slice(9)); var hc3 = IntrinsicsUtils.Create4UInt(n3[0], n3[4], n3[8], n3[12]); hc3 = Avx2.ShiftRightLogical(hc3, 6); hc3 = Avx2.And(hc3, And256); hc3 = Avx2.Add(hc3, Vector256.CreateScalar(_h3)); var n4 = MemoryMarshal.Cast <byte, uint>(m.Slice(12)); var hc4 = IntrinsicsUtils.Create4UInt(n4[0], n4[4], n4[8], n4[12]); hc4 = Avx2.ShiftRightLogical(hc4, 8); hc4 = Avx2.Or(hc4, Or256); hc4 = Avx2.Add(hc4, Vector256.CreateScalar(_h4)); var t1 = Avx2.Multiply(_ruwy0, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz1, hc4)); var d0 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy1, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc4)); var d1 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy2, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc4)); var d2 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy3, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc4)); var d3 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy4, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy3, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc4)); var d4 = t1.Add4UInt64(); _h0 = (uint)d0 & 0x3ffffff; d1 += d0 >> 26; _h1 = (uint)d1 & 0x3ffffff; d2 += d1 >> 26; _h2 = (uint)d2 & 0x3ffffff; d3 += d2 >> 26; _h3 = (uint)d3 & 0x3ffffff; d4 += d3 >> 26; _h4 = (uint)d4 & 0x3ffffff; _h0 += (uint)((d4 >> 26) * 5); _h1 += _h0 >> 26; _h0 &= 0x3ffffff; }
public static unsafe uint CalculateAvx2(uint adler, ReadOnlySpan <byte> buffer) { uint s1 = adler & 0xFFFF; uint s2 = (adler >> 16) & 0xFFFF; uint length = (uint)buffer.Length; fixed(byte *bufferPtr = &MemoryMarshal.GetReference(buffer)) { byte *localBufferPtr = bufferPtr; Vector256 <byte> zero = Vector256 <byte> .Zero; var dot3v = Vector256.Create((short)1); var dot2v = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); // Process n blocks of data. At most NMAX data bytes can be // processed before s2 must be reduced modulo BASE. var vs1 = Vector256.CreateScalar(s1); var vs2 = Vector256.CreateScalar(s2); while (length >= 32) { int k = length < NMAX ? (int)length : (int)NMAX; k -= k % 32; length -= (uint)k; Vector256 <uint> vs10 = vs1; Vector256 <uint> vs3 = Vector256 <uint> .Zero; while (k >= 32) { // Load 32 input bytes. Vector256 <byte> block = Avx.LoadVector256(localBufferPtr); // Sum of abs diff, resulting in 2 x int32's Vector256 <ushort> vs1sad = Avx2.SumAbsoluteDifferences(block, zero); vs1 = Avx2.Add(vs1, vs1sad.AsUInt32()); vs3 = Avx2.Add(vs3, vs10); // sum 32 uint8s to 16 shorts. Vector256 <short> vshortsum2 = Avx2.MultiplyAddAdjacent(block, dot2v); // sum 16 shorts to 8 uint32s. Vector256 <int> vsum2 = Avx2.MultiplyAddAdjacent(vshortsum2, dot3v); vs2 = Avx2.Add(vsum2.AsUInt32(), vs2); vs10 = vs1; localBufferPtr += BlockSize; k -= 32; } // Defer the multiplication with 32 to outside of the loop. vs3 = Avx2.ShiftLeftLogical(vs3, 5); vs2 = Avx2.Add(vs2, vs3); s1 = (uint)Numerics.EvenReduceSum(vs1.AsInt32()); s2 = (uint)Numerics.ReduceSum(vs2.AsInt32()); s1 %= BASE; s2 %= BASE; vs1 = Vector256.CreateScalar(s1); vs2 = Vector256.CreateScalar(s2); } if (length > 0) { HandleLeftOver(localBufferPtr, length, ref s1, ref s2); } return(s1 | (s2 << 16)); } }
private static unsafe int CalculateDistance(string sourceString, int sourceLength, string targetString, int targetLength, int startIndex) { var arrayPool = ArrayPool <int> .Shared; var pooledArray = arrayPool.Rent(targetLength); Span <int> previousRow = pooledArray; ReadOnlySpan <char> source = sourceString.AsSpan().Slice(startIndex, sourceLength); ReadOnlySpan <char> target = targetString.AsSpan().Slice(startIndex, targetLength); //ArrayPool values are sometimes bigger than allocated, let's trim our span to exactly what we use previousRow = previousRow.Slice(0, targetLength); fixed(char *targetPtr = target) fixed(char *srcPtr = source) fixed(int *previousRowPtr = previousRow) { FillRow(previousRowPtr, targetLength); var rowIndex = 0; //var sourceV = Vector128<short>.Zero; const int VECTOR_LENGTH = 16; for (; rowIndex < sourceLength - VECTOR_LENGTH - 1; rowIndex += VECTOR_LENGTH) { // todo max var temp = Vector128.Create(rowIndex); var diag = Sse42.PackUnsignedSaturate(temp, temp).ToVector256(); var one = Vector256.Create((ushort)1); var left = Avx2.AddSaturate(diag, one); var sourceV = Avx2.LoadVector256((ushort *)(srcPtr + rowIndex)); var targetV = Vector256 <ushort> .Zero; var shift = Vector256.CreateScalar(ushort.MaxValue); // First 3 iterations fills the vector for (int columnIndex = 0; columnIndex < VECTOR_LENGTH - 1; columnIndex++) { // Shift in the next character targetV = ShiftLeft(targetV); //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0); targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex])); // Insert "(rowIndex + columnIndex + 1)" from the left var leftValue = Vector256.Create(rowIndex + columnIndex + 1); left = Avx2.Or(Avx2.And(shift, Avx2.PackUnsignedSaturate(leftValue, leftValue)), left); shift = ShiftLeft(shift); // compare source to target // alternativ, compare equal and OR with One var match = Avx2.CompareEqual(sourceV, targetV); var add = Avx2.AndNot(match, one); var next = Avx2.AddSaturate(diag, add); // Create next diag which is current up var up = ShiftLeft(left); //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0); up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex])); var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one); next = Avx2.Min(next, tmp); left = next; diag = up; } var writePtr = previousRowPtr; * writePtr = left.GetElement(VECTOR_LENGTH - 1); writePtr++; for (int columnIndex = VECTOR_LENGTH; columnIndex < targetLength; columnIndex++) { // Shift in the next character targetV = ShiftLeft(targetV); //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0); targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex])); // compare source to target // alternativ, compare equal and OR with One var match = Avx2.CompareEqual(sourceV, targetV); var add = Avx2.AndNot(match, one); var next = Avx2.AddSaturate(diag, add); // Create next diag which is current up var up = ShiftLeft(left); //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0); up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex])); var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one); next = Avx2.Min(next, tmp); left = next; diag = up; // Store one value *writePtr = next.GetElement(VECTOR_LENGTH - 1); writePtr++; } // Finish with last 3 items, dont read any more chars just extract them for (int i = targetLength - (VECTOR_LENGTH - 1); i < previousRow.Length; i++) { // Shift in the next character targetV = ShiftLeft(targetV); // compare source to target // alternativ, compare equal and OR with One var match = Avx2.CompareEqual(sourceV, targetV); var add = Avx2.AndNot(match, one); var next = Avx2.AddSaturate(diag, add); // Create next diag which is current up var up = ShiftLeft(left); var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one); next = Avx2.Min(next, tmp); left = next; diag = up; // Store one value previousRowPtr[i] = left.GetElement(VECTOR_LENGTH - 1); // writePtr++; } #if DEBUG if (true) { Console.Write("prev values for row {0}:", rowIndex); for (int i = 0; i < targetLength; ++i) { Console.Write("{0} ", previousRow[i]); } Console.WriteLine(); } #endif } //Calculate Single Rows for (; rowIndex < sourceLength; rowIndex++) { var lastSubstitutionCost = rowIndex; var lastInsertionCost = rowIndex + 1; var sourcePrevChar = source[rowIndex]; #if DEBUG Console.Write("prev values for row {0}:", rowIndex); for (int i = 0; i < targetLength; ++i) { Console.Write("{0} ", previousRow[i]); } Console.WriteLine(); #endif CalculateRow(previousRowPtr, targetPtr, targetLength, sourcePrevChar, lastInsertionCost, lastSubstitutionCost); } } var result = previousRow[targetLength - 1]; arrayPool.Return(pooledArray); return(result); }