private static void ReverseEndianess(ulong *source, ulong *dest, int len) { int vecLen = Vector128 <ulong> .Count; if (Ssse3.IsSupported && len >= vecLen) { int i = 0; do { var vec = Sse2.LoadVector128(source + i); vec = Ssse3.Shuffle(vec.AsByte(), ReverseEndianess_64_128).AsUInt64(); Sse2.Store(dest + i, vec); i += Vector128 <ulong> .Count; }while (len - i >= Vector128 <ulong> .Count); //Remainder problem if (i != len) { dest[i] = BinaryPrimitives.ReverseEndianness(source[i]); } return; } for (int i = 0; i < len; ++i) { dest[i] = BinaryPrimitives.ReverseEndianness(source[i]); } }
private static void ReverseEndianess(ulong *ptr, int len) { int i = 0; if (Ssse3.IsSupported && len >= 4) { do { var ptrTmp = ptr + i; var vec = Sse2.LoadVector128(ptrTmp); vec = Ssse3.Shuffle(vec.AsByte(), ReverseEndianess_64_128).AsUInt64(); Sse2.Store(ptrTmp, vec); i += Vector128 <ulong> .Count; }while (len - i >= Vector128 <ulong> .Count); if (i != len) { ptr[i] = BinaryPrimitives.ReverseEndianness(ptr[i]); } return; } for (; i < len; ++i) { ptr[i] = BinaryPrimitives.ReverseEndianness(ptr[i]); } }
/// <summary> /// Combines the vectors into a single one with the same layout /// 00,01,02,03,04,10,11,12,13,14,__,__,__,k0,k1,cc /// </summary> private static Vector128 <byte> CalculatePhase1(Vector128 <byte> a, Vector128 <byte> b) { // first calculate all the sums, then merge them down with repeated vertical max // inlined vector creation is faster than static fields, if created from two 64 bit values var va1 = Ssse3.Shuffle(a, Vector128.Create(0x01_03_02_01_04_03_02_01UL, 0xFF_0D_01_02_01_01_02_01UL).AsByte()); var vb1 = Ssse3.Shuffle(b, Vector128.Create(0x01_05_06_07_05_06_07_08UL, 0xFF_0E_02_02_03_05_05_06UL).AsByte()); var vab1 = Sse2.Add(va1, vb1); var va2 = Ssse3.Shuffle(a, Vector128.Create(0xFF_07_06_05_08_07_06_05UL, 0xFF_0E_02_FF_03_05_06_05UL).AsByte()); var vb2 = Ssse3.Shuffle(b, Vector128.Create(0xFF_01_02_03_01_02_03_04UL, 0xFF_0D_01_FF_01_01_01_02UL).AsByte()); var vab2 = Sse2.Add(va2, vb2); var xab1 = Sse2.Max(vab1, vab2); var vab21 = Ssse3.Shuffle(xab1, Vector128.Create(0x07_0D_0B_FF_0A_08_01_00UL, 0xFF_0E_FF_FF_06_04_FF_FFUL).AsByte()); var vab22 = Ssse3.Shuffle(xab1, Vector128.Create(0xFF_FF_0C_FF_FF_09_03_02UL, 0xFF_FF_FF_FF_FF_05_FF_FFUL).AsByte()); var xab2 = Sse2.Max(vab21, vab22); var vab31 = Ssse3.Shuffle(xab2, Vector128.Create(0x02_03_FF_05_06_07_FF_FFUL, 0xFF_0E_FF_FF_FF_0F_00_0AUL).AsByte()); var vab32 = Ssse3.Shuffle(xab2, Vector128.Create(0xFF_FF_FF_FF_FF_FF_FF_FFUL, 0xFF_FF_FF_FF_FF_FF_01_0BUL).AsByte()); // calculates chiitoitsu sum and kokushi without pair sum // barely not enough space in above calculation for these var b2 = Sse2.And(b, Vector128.Create(0UL, 0xFF_00_FF_00_00_00_00_00UL).AsByte()); var a2 = Sse2.Add(a, b2); return(Sse2.Max(Sse2.Max(a2, b), Sse2.Max(vab31, vab32))); }
public void RunStructFldScenario(SimpleBinaryOpTest__ShuffleSByte testClass) { var result = Ssse3.Shuffle(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public void RunClassFldScenario() { var result = Ssse3.Shuffle(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public int TryParseSIMDUseCount(byte *p, int cnt, out int n) { var tmp = Sse2.LoadVector128(p); var tmp1 = Sse.StaticCast <byte, sbyte>(tmp); tmp1 = Sse2.Subtract(tmp1, subtmp); var data0 = Ssse3.Shuffle(tmp1, mask0); var data1 = Ssse3.Shuffle(tmp1, mask1); var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]); var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]); var x = Sse2.Add(mul0, mul1); x = Ssse3.HorizontalAdd(x, x); x = Ssse3.HorizontalAdd(x, x); n = Sse41.Extract(x, 3); var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9); var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9); var xx = Sse2.Add(com0, com1); xx = Ssse3.HorizontalAdd(xx, xx); xx = Ssse3.HorizontalAdd(xx, xx); return(Sse41.Extract(xx, 3)); }
/// <summary> /// Searches for an opening character from a registered parser in the specified string. /// </summary> /// <param name="text">The text.</param> /// <param name="start">The start.</param> /// <param name="end">The end.</param> /// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns> public int IndexOfOpeningCharacter(string text, int start, int end) { Debug.Assert(text is not null); Debug.Assert(start >= 0 && end >= 0); Debug.Assert(end - start + 1 >= 0); Debug.Assert(end - start + 1 <= text.Length); if (nonAsciiMap is null) { #if NETCOREAPP3_1_OR_GREATER if (Ssse3.IsSupported && BitConverter.IsLittleEndian) { // Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm // Optimized for sets in the [1, 127] range int lengthMinusOne = end - start; int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128 <short> .Count - 1); int finalStart = start + charsToProcessVectorized; if (start < finalStart) { ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start); Vector128 <byte> bitmap = _asciiBitmap; do { // Load 32 bytes (16 chars) into two Vector128<short>s (chars) // Drop the high byte of each char // Pack the remaining bytes into a single Vector128<byte> Vector128 <byte> input = Sse2.PackUnsignedSaturate( Unsafe.ReadUnaligned <Vector128 <short> >(ref Unsafe.As <char, byte>(ref textStartRef)), Unsafe.ReadUnaligned <Vector128 <short> >(ref Unsafe.As <char, byte>(ref Unsafe.Add(ref textStartRef, Vector128 <short> .Count)))); // Extract the higher nibble of each character ((input >> 4) & 0xF) Vector128 <byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF)); // Lookup the matching higher nibble for each character based on the lower nibble // PSHUFB will set the result to 0 for any non-ASCII (> 127) character Vector128 <byte> bitsets = Ssse3.Shuffle(bitmap, input); // Calculate a bitmask (1 << (higherNibble % 8)) for each character Vector128 <byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles); // Check which characters are present in the set // We are relying on bitsets being zero for non-ASCII characters Vector128 <byte> result = Sse2.And(bitsets, bitmask); if (!result.Equals(Vector128 <byte> .Zero)) { int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128 <byte> .Zero)); return(start + BitOperations.TrailingZeroCount((uint)resultMask)); } start += 2 * Vector128 <short> .Count; textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128 <short> .Count); }while (start != finalStart); } } ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
public void RunClassLclFldScenario() { var test = new SimpleBinaryOpTest__ShuffleSByte(); var result = Ssse3.Shuffle(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Ssse3.Shuffle(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
private unsafe bool TryParseInt(long input, out int value) { var vector = input - ShortCharA; var r = (vector & ShortN15) == 0; vector = (long)((((ulong)vector) << 4) | (((ulong)vector) >> 8)); value = Sse41.Extract(Ssse3.Shuffle(Vector128.CreateScalar(vector).AsSByte(), NShuffleMask).AsInt32(), 0); return(r); }
public void RunLclVarScenario_LoadAligned() { var left = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)); var result = Ssse3.Shuffle(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Ssse3.Shuffle(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr); var result = Ssse3.Shuffle(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Ssse3.Shuffle( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Ssse3.Shuffle( Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Ssse3.Shuffle( Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__ShuffleByte(); var result = Ssse3.Shuffle(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Ssse3.Shuffle(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var left = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)); var result = Ssse3.Shuffle(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var left = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr); var result = Ssse3.Shuffle(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
private static void EncodeToUtf16_Ssse3(ReadOnlySpan <byte> bytes, Span <char> chars, Casing casing) { Debug.Assert(bytes.Length >= 4); nint pos = 0; Vector128 <byte> shuffleMask = Vector128.Create( 0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF, 1, 0xFF, 0xFF, 0xFF, 2, 0xFF, 0xFF, 0xFF, 3, 0xFF); Vector128 <byte> asciiTable = (casing == Casing.Upper) ? Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F') : Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f'); do { // Read 32bits from "bytes" span at "pos" offset uint block = Unsafe.ReadUnaligned <uint>( ref Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos)); // Calculate nibbles Vector128 <byte> lowNibbles = Ssse3.Shuffle( Vector128.CreateScalarUnsafe(block).AsByte(), shuffleMask); Vector128 <byte> highNibbles = Sse2.ShiftRightLogical( Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte(); // Lookup the hex values at the positions of the indices Vector128 <byte> indices = Sse2.And( Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF)); Vector128 <byte> hex = Ssse3.Shuffle(asciiTable, indices); // The high bytes (0x00) of the chars have also been converted // to ascii hex '0', so clear them out. hex = Sse2.And(hex, Vector128.Create((ushort)0xFF).AsByte()); // Save to "chars" at pos*2 offset Unsafe.WriteUnaligned( ref Unsafe.As <char, byte>( ref Unsafe.Add(ref MemoryMarshal.GetReference(chars), pos * 2)), hex); pos += 4; } while (pos < bytes.Length - 3); // Process trailing elements (bytes.Length % 4) for (; pos < bytes.Length; pos++) { ToCharsBuffer(Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos), chars, (int)pos * 2, casing); } }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Sse2.LoadVector128((SByte *)(_dataTable.inArray1Ptr)); var op2 = Sse2.LoadVector128((SByte *)(_dataTable.inArray2Ptr)); var result = Ssse3.Shuffle(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Ssse3.Shuffle( Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public int Shanten() { var b2 = Ssse3.Shuffle(_b, _reverseBVector); var r0 = Sse2.Add(_a, b2); var r1 = Sse2.Subtract(_inversionVector, r0); var r3 = Sse2.ShiftRightLogical(r1.AsInt16(), 8); var r4 = Sse2.Min(r1, r3.AsByte()); var r5 = Sse41.MinHorizontal(r4.AsUInt16()); var r6 = (byte)Sse2.ConvertToInt32(r5.AsInt32()); return(r6 - 1); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Ssse3.Shuffle( Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Ssse3.Shuffle( Sse2.LoadVector128((Byte *)(&test._fld1)), Sse2.LoadVector128((Byte *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static Vector128 <sbyte> CreateEscapingMask( Vector128 <sbyte> sourceValue, Vector128 <sbyte> bitMaskLookup, Vector128 <sbyte> bitPosLookup, Vector128 <sbyte> nibbleMaskSByte, Vector128 <sbyte> nullMaskSByte) { // To check if an input byte needs to be escaped or not, we use a bitmask-lookup. // Therefore we split the input byte into the low- and high-nibble, which will get // the row-/column-index in the bit-mask. // The bitmask-lookup looks like (here for example s_bitMaskLookupBasicLatin): // high-nibble // low-nibble 0 1 2 3 4 5 6 7 8 9 A B C D E F // 0 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 // 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // 2 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 // 3 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // 4 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // 5 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // 6 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 // 7 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 // 8 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // 9 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // A 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // B 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 // C 1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 1 // D 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 // E 1 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 // F 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 // // where 1 denotes the neeed for escaping, while 0 means no escaping needed. // For high-nibbles in the range 8..F every input needs to be escaped, so we // can omit them in the bit-mask, thus only high-nibbles in the range 0..7 need // to be considered, hence the entries in the bit-mask can be of type byte. // // In the bitmask-lookup for each row (= low-nibble) a bit-mask for the // high-nibbles (= columns) is created. Debug.Assert(Ssse3.IsSupported); Vector128 <sbyte> highNibbles = Sse2.And(Sse2.ShiftRightLogical(sourceValue.AsInt32(), 4).AsSByte(), nibbleMaskSByte); Vector128 <sbyte> lowNibbles = Sse2.And(sourceValue, nibbleMaskSByte); Vector128 <sbyte> bitMask = Ssse3.Shuffle(bitMaskLookup, lowNibbles); Vector128 <sbyte> bitPositions = Ssse3.Shuffle(bitPosLookup, highNibbles); Vector128 <sbyte> mask = Sse2.And(bitPositions, bitMask); mask = Sse2.CompareEqual(nullMaskSByte, Sse2.CompareEqual(nullMaskSByte, mask)); return(mask); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__ShuffleByte testClass) { fixed(Vector128 <Byte> *pFld1 = &_fld1) fixed(Vector128 <Byte> *pFld2 = &_fld2) { var result = Ssse3.Shuffle( Sse2.LoadVector128((Byte *)(pFld1)), Sse2.LoadVector128((Byte *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public static unsafe void CalculateDiagonalSection_Sse41 <T>(void *refDiag1Ptr, void *refDiag2Ptr, char *sourcePtr, char *targetPtr, ref int rowIndex, int columnIndex) where T : struct { if (typeof(T) == typeof(int)) { var diag1Ptr = (int *)refDiag1Ptr; var diag2Ptr = (int *)refDiag2Ptr; var sourceVector = Sse41.ConvertToVector128Int32((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count); var targetVector = Sse41.ConvertToVector128Int32((ushort *)targetPtr + columnIndex - 1); targetVector = Sse2.Shuffle(targetVector, 0x1b); var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector); var substitutionCost = Sse2.Add( Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count), substitutionCostAdjustment ); var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1)); var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count); var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost); localCost = Sse2.Add(localCost, Vector128.Create(1)); Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost); } else if (typeof(T) == typeof(ushort)) { var diag1Ptr = (ushort *)refDiag1Ptr; var diag2Ptr = (ushort *)refDiag2Ptr; var sourceVector = Sse3.LoadDquVector128((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count); var targetVector = Sse3.LoadDquVector128((ushort *)targetPtr + columnIndex - 1); targetVector = Ssse3.Shuffle(targetVector.AsByte(), REVERSE_USHORT_AS_BYTE_128).AsUInt16(); var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector); var substitutionCost = Sse2.Add( Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count), substitutionCostAdjustment ); var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1)); var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count); var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost); localCost = Sse2.Add(localCost, Vector128.Create((ushort)1)); Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Byte> *pFld1 = &_fld1) fixed(Vector128 <Byte> *pFld2 = &_fld2) { var result = Ssse3.Shuffle( Sse2.LoadVector128((Byte *)(pFld1)), Sse2.LoadVector128((Byte *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }