public unsafe void Encrypt(ReadOnlySpan <byte> nonce, ReadOnlySpan <byte> source, Span <byte> destination, Span <byte> tag, ReadOnlySpan <byte> associatedData = default) { CheckInput(nonce, source, destination); ulong length = (ulong)source.Length << 3; Span <byte> counterBlock = _counterBlock.AsSpan(0, BlockSize); Span <byte> counter = counterBlock.Slice(12, 4); nonce.CopyTo(counterBlock); counter[0] = 0; counter[1] = 0; counter[2] = 0; counter[3] = 1; _crypto.Encrypt(counterBlock, tag); counter[3] = 2; uint c = 2; _gHash.Update(associatedData); while (!source.IsEmpty) { _crypto.Encrypt(counterBlock, _buffer); ++c; BinaryPrimitives.WriteUInt32BigEndian(counter, c); int n = Math.Min(source.Length, BlockSize); fixed(byte *pOut = destination) fixed(byte *pSource = source) fixed(byte *pBuffer = _buffer) { IntrinsicsUtils.Xor(pSource, pBuffer, pOut, n); } _gHash.Update(destination[..n]);
protected override unsafe void Xor(byte *stream, byte *source, byte *destination, int length) { IntrinsicsUtils.Xor(stream, source, destination, length); }
public Poly1305X86(ReadOnlySpan <byte> key) { if (key.Length < KeySize) { throw new ArgumentException(@"Key length must be 32 bytes", nameof(key)); } // r &= 0xFFFFFFC0FFFFFFC0FFFFFFC0FFFFFFF var r0 = BinaryPrimitives.ReadUInt32LittleEndian(key) & 0x3FFFFFF; var r1 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(3)) >> 2 & 0x3FFFF03; var r2 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(6)) >> 4 & 0x3FFC0FF; var r3 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(9)) >> 6 & 0x3F03FFF; var r4 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(12)) >> 8 & 0x00FFFFF; var s1 = r1 * 5; var s2 = r2 * 5; var s3 = r3 * 5; var s4 = r4 * 5; _r0s4 = IntrinsicsUtils.CreateTwoUInt(r0, s4); _s3s2 = IntrinsicsUtils.CreateTwoUInt(s3, s2); _r1r0 = IntrinsicsUtils.CreateTwoUInt(r1, r0); _s4s3 = IntrinsicsUtils.CreateTwoUInt(s4, s3); _s1s2 = IntrinsicsUtils.CreateTwoUInt(s1, s2); _r2r1 = IntrinsicsUtils.CreateTwoUInt(r2, r1); _r3r2 = IntrinsicsUtils.CreateTwoUInt(r3, r2); _s3s4 = IntrinsicsUtils.CreateTwoUInt(s3, s4); _r4r3 = IntrinsicsUtils.CreateTwoUInt(r4, r3); _r0 = Sse2.ConvertScalarToVector128UInt32(r0); _x0 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(16)); _x1 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(20)); _x2 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(24)); _x3 = BinaryPrimitives.ReadUInt32LittleEndian(key.Slice(28)); var u0 = r0; var u1 = r1; var u2 = r2; var u3 = r3; var u4 = r4; MultiplyR(ref u0, ref u1, ref u2, ref u3, ref u4); _ru0 = IntrinsicsUtils.CreateTwoUInt(u0, r0); _ru1 = IntrinsicsUtils.CreateTwoUInt(u1, r1); _ru2 = IntrinsicsUtils.CreateTwoUInt(u2, r2); _ru3 = IntrinsicsUtils.CreateTwoUInt(u3, r3); _ru4 = IntrinsicsUtils.CreateTwoUInt(u4, r4); _sv1 = _ru1.Multiply5(); _sv2 = _ru2.Multiply5(); _sv3 = _ru3.Multiply5(); _sv4 = _ru4.Multiply5(); if (Avx2.IsSupported) { var w0 = u0; var w1 = u1; var w2 = u2; var w3 = u3; var w4 = u4; MultiplyR(ref w0, ref w1, ref w2, ref w3, ref w4); var y0 = w0; var y1 = w1; var y2 = w2; var y3 = w3; var y4 = w4; MultiplyR(ref y0, ref y1, ref y2, ref y3, ref y4); _ruwy0 = IntrinsicsUtils.Create4UInt(y0, w0, u0, r0); _ruwy1 = IntrinsicsUtils.Create4UInt(y1, w1, u1, r1); _ruwy2 = IntrinsicsUtils.Create4UInt(y2, w2, u2, r2); _ruwy3 = IntrinsicsUtils.Create4UInt(y3, w3, u3, r3); _ruwy4 = IntrinsicsUtils.Create4UInt(y4, w4, u4, r4); _svxz1 = _ruwy1.Multiply5(); _svxz2 = _ruwy2.Multiply5(); _svxz3 = _ruwy3.Multiply5(); _svxz4 = _ruwy4.Multiply5(); } }
private void Block4(ReadOnlySpan <byte> m) { var n0 = MemoryMarshal.Cast <byte, uint>(m); var hc0 = IntrinsicsUtils.Create4UInt(n0[0], n0[4], n0[8], n0[12]); hc0 = Avx2.And(hc0, And256); hc0 = Avx2.Add(hc0, Vector256.CreateScalar(_h0)); var n1 = MemoryMarshal.Cast <byte, uint>(m.Slice(3)); var hc1 = IntrinsicsUtils.Create4UInt(n1[0], n1[4], n1[8], n1[12]); hc1 = Avx2.ShiftRightLogical(hc1, 2); hc1 = Avx2.And(hc1, And256); hc1 = Avx2.Add(hc1, Vector256.CreateScalar(_h1)); var n2 = MemoryMarshal.Cast <byte, uint>(m.Slice(6)); var hc2 = IntrinsicsUtils.Create4UInt(n2[0], n2[4], n2[8], n2[12]); hc2 = Avx2.ShiftRightLogical(hc2, 4); hc2 = Avx2.And(hc2, And256); hc2 = Avx2.Add(hc2, Vector256.CreateScalar(_h2)); var n3 = MemoryMarshal.Cast <byte, uint>(m.Slice(9)); var hc3 = IntrinsicsUtils.Create4UInt(n3[0], n3[4], n3[8], n3[12]); hc3 = Avx2.ShiftRightLogical(hc3, 6); hc3 = Avx2.And(hc3, And256); hc3 = Avx2.Add(hc3, Vector256.CreateScalar(_h3)); var n4 = MemoryMarshal.Cast <byte, uint>(m.Slice(12)); var hc4 = IntrinsicsUtils.Create4UInt(n4[0], n4[4], n4[8], n4[12]); hc4 = Avx2.ShiftRightLogical(hc4, 8); hc4 = Avx2.Or(hc4, Or256); hc4 = Avx2.Add(hc4, Vector256.CreateScalar(_h4)); var t1 = Avx2.Multiply(_ruwy0, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz1, hc4)); var d0 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy1, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc4)); var d1 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy2, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc4)); var d2 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy3, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc4)); var d3 = t1.Add4UInt64(); t1 = Avx2.Multiply(_ruwy4, hc0); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy3, hc1)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc2)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc3)); t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc4)); var d4 = t1.Add4UInt64(); _h0 = (uint)d0 & 0x3ffffff; d1 += d0 >> 26; _h1 = (uint)d1 & 0x3ffffff; d2 += d1 >> 26; _h2 = (uint)d2 & 0x3ffffff; d3 += d2 >> 26; _h3 = (uint)d3 & 0x3ffffff; d4 += d3 >> 26; _h4 = (uint)d4 & 0x3ffffff; _h0 += (uint)((d4 >> 26) * 5); _h1 += _h0 >> 26; _h0 &= 0x3ffffff; }
private void Block2(ReadOnlySpan <byte> m) { var n0 = MemoryMarshal.Cast <byte, uint>(m); var hc0 = IntrinsicsUtils.CreateTwoUInt(n0[0], n0[4]); hc0 = Sse2.And(hc0, And128); hc0 = Sse2.Add(hc0, Sse2.ConvertScalarToVector128UInt32(_h0)); var n1 = MemoryMarshal.Cast <byte, uint>(m.Slice(3)); var hc1 = IntrinsicsUtils.CreateTwoUInt(n1[0], n1[4]); hc1 = Sse2.ShiftRightLogical(hc1, 2); hc1 = Sse2.And(hc1, And128); hc1 = Sse2.Add(hc1, Sse2.ConvertScalarToVector128UInt32(_h1)); var n2 = MemoryMarshal.Cast <byte, uint>(m.Slice(6)); var hc2 = IntrinsicsUtils.CreateTwoUInt(n2[0], n2[4]); hc2 = Sse2.ShiftRightLogical(hc2, 4); hc2 = Sse2.And(hc2, And128); hc2 = Sse2.Add(hc2, Sse2.ConvertScalarToVector128UInt32(_h2)); var n3 = MemoryMarshal.Cast <byte, uint>(m.Slice(9)); var hc3 = IntrinsicsUtils.CreateTwoUInt(n3[0], n3[4]); hc3 = Sse2.ShiftRightLogical(hc3, 6); hc3 = Sse2.And(hc3, And128); hc3 = Sse2.Add(hc3, Sse2.ConvertScalarToVector128UInt32(_h3)); var n4 = MemoryMarshal.Cast <byte, uint>(m.Slice(12)); var hc4 = IntrinsicsUtils.CreateTwoUInt(n4[0], n4[4]); hc4 = Sse2.ShiftRightLogical(hc4, 8); hc4 = Sse2.Xor(hc4, Or128); hc4 = Sse2.Add(hc4, Sse2.ConvertScalarToVector128UInt32(_h4)); var t1 = Sse2.Multiply(_ru0, hc0); t1 = Sse2.Add(t1, Sse2.Multiply(_sv4, hc1)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv3, hc2)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv2, hc3)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv1, hc4)); var d0 = t1.Add2UInt64(); t1 = Sse2.Multiply(_ru1, hc0); t1 = Sse2.Add(t1, Sse2.Multiply(_ru0, hc1)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv4, hc2)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv3, hc3)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv2, hc4)); var d1 = t1.Add2UInt64(); t1 = Sse2.Multiply(_ru2, hc0); t1 = Sse2.Add(t1, Sse2.Multiply(_ru1, hc1)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru0, hc2)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv4, hc3)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv3, hc4)); var d2 = t1.Add2UInt64(); t1 = Sse2.Multiply(_ru3, hc0); t1 = Sse2.Add(t1, Sse2.Multiply(_ru2, hc1)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru1, hc2)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru0, hc3)); t1 = Sse2.Add(t1, Sse2.Multiply(_sv4, hc4)); var d3 = t1.Add2UInt64(); t1 = Sse2.Multiply(_ru4, hc0); t1 = Sse2.Add(t1, Sse2.Multiply(_ru3, hc1)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru2, hc2)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru1, hc3)); t1 = Sse2.Add(t1, Sse2.Multiply(_ru0, hc4)); var d4 = t1.Add2UInt64(); _h0 = (uint)d0 & 0x3ffffff; d1 += d0 >> 26; _h1 = (uint)d1 & 0x3ffffff; d2 += d1 >> 26; _h2 = (uint)d2 & 0x3ffffff; d3 += d2 >> 26; _h3 = (uint)d3 & 0x3ffffff; d4 += d3 >> 26; _h4 = (uint)d4 & 0x3ffffff; _h0 += (uint)((d4 >> 26) * 5); _h1 += _h0 >> 26; _h0 &= 0x3ffffff; }
public unsafe void Encrypt(ReadOnlySpan <byte> nonce, ReadOnlySpan <byte> source, Span <byte> destination, Span <byte> tag, ReadOnlySpan <byte> associatedData = default) { if (nonce.Length != NonceSize) { throw new ArgumentException(@"Nonce size must be 12 bytes", nameof(nonce)); } if (destination.Length != source.Length) { throw new ArgumentException(string.Empty, nameof(destination)); } var length = (ulong)source.Length << 3; var counterBlock = _counterBlock.AsSpan(0, BlockSize4); var counter0 = counterBlock.Slice(12, 4); var counter1 = counterBlock.Slice(28, 4); var counter2 = counterBlock.Slice(44, 4); var counter3 = counterBlock.Slice(60, 4); nonce.CopyTo(counterBlock); nonce.CopyTo(counterBlock.Slice(16)); nonce.CopyTo(counterBlock.Slice(32)); nonce.CopyTo(counterBlock.Slice(48)); counter0[0] = 0; counter0[1] = 0; counter0[2] = 0; counter0[3] = 1; counter1[0] = 0; counter1[1] = 0; counter1[2] = 0; counter1[3] = 3; counter2[0] = 0; counter2[1] = 0; counter2[2] = 0; counter2[3] = 4; counter3[0] = 0; counter3[1] = 0; counter3[2] = 0; counter3[3] = 5; _crypto.Encrypt(counterBlock, tag); counter0[3] = 2; _gHash.Update(associatedData); while (!source.IsEmpty) { _crypto.Encrypt4(counterBlock, _buffer); counter0.IncrementBe4(); counter1.IncrementBe4(); counter2.IncrementBe4(); counter3.IncrementBe4(); var n = Math.Min(source.Length, BlockSize4); fixed(byte *pOut = destination) fixed(byte *pSource = source) fixed(byte *pBuffer = _buffer) { IntrinsicsUtils.Xor(pSource, pBuffer, pOut, n); } _gHash.Update(destination.Slice(0, n)); source = source.Slice(n); destination = destination.Slice(n); } BinaryPrimitives.WriteUInt64BigEndian(_buffer, (ulong)associatedData.Length << 3); BinaryPrimitives.WriteUInt64BigEndian(_buffer.AsSpan(8), length); _gHash.Update(_buffer.AsSpan(0, TagSize)); _gHash.GetMac(_buffer); fixed(byte *pTag = tag) fixed(byte *pBuffer = _buffer) { IntrinsicsUtils.Xor16(pTag, pBuffer, pTag); } }