        private static void ProcessInternal(byte[] ciphertext, int ciphertextOffset, byte[] message, int messageOffset, int messageLength, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            Array16 <UInt32> internalKey;

            PrepareInternalKey(out internalKey, key, keyOffset, nonce, nonceOffset);

            Array16 <UInt32> temp;

            byte[] tempBytes = new byte[64];//todo: remove allocation

            int blockOffset = 0;

            while (blockOffset < messageLength)
                SalsaCore.Salsa(out temp, ref internalKey, 20);
                ByteIntegerConverter.Array16StoreLittleEndian32(tempBytes, 0, ref temp);
                int count = Math.Min(64, messageLength - blockOffset);
                for (int i = 0; i < count; i++)
                    ciphertext[ciphertextOffset + blockOffset + i] = (byte)(message[messageOffset + blockOffset + i] ^ tempBytes[i]);
                blockOffset += 64;
            CryptoBytes.Wipe(tempBytes); // DON'T LEAK!
        public override bool Verify(byte[] signature, byte[] message, byte[] key)
            if (signature == null)
                throw new ArgumentNullException("signature");
            if (message == null)
                throw new ArgumentNullException("message");
            if (key == null)
                throw new ArgumentNullException("key");
            if (signature.Length != 16)
                throw new ArgumentException("Invalid signature size", "signature");
            if (key.Length != 32)
                throw new ArgumentException("Invalid key size", "key");

            var tempBytes = new byte[16];//todo: remove allocation

            ByteIntegerConverter.Array8LoadLittleEndian32(out Array8 <uint> internalKey, key, 0);
            Poly1305Donna.poly1305_auth(tempBytes, 0, message, 0, message.Length, ref internalKey);
            return(CryptoBytes.ConstantTimeEquals(tempBytes, signature));
        public void Finish(ArraySegment <byte> output)
            if (output.Array == null)
                throw new ArgumentNullException(nameof(output));
            if (output.Count != 64)
                throw new ArgumentException("output.Count must be 64");

            Update(Padding, 0, Padding.Length);
            ByteIntegerConverter.Array16LoadBigEndian64(out var block, _buffer, 0);
            CryptoBytes.InternalWipe(_buffer, 0, _buffer.Length);
            var bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            if (bytesInBuffer > BlockSize - 16)
                Sha512Internal.Core(out _state, ref _state, ref block);
                block = default(Array16 <ulong>);

            block.x15 = (_totalBytes - 1) * 8;
            Sha512Internal.Core(out _state, ref _state, ref block);

            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 0, _state.x0);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 8, _state.x1);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 16, _state.x2);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 24, _state.x3);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 32, _state.x4);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 40, _state.x5);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 48, _state.x6);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 56, _state.x7);
            _state = default(Array8 <ulong>);
        /// <summary>
        /// Finalizes SHA-512 hashing
        /// </summary>
        /// <param name="output">
        /// Output buffer
        /// </param>
        public void Finalize(ArraySegment <byte> output)
            Update(Padding, 0, Padding.Length);
            Array16 <ulong> block;

            ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer, 0);
            CryptoBytes.InternalWipe(_buffer, 0, _buffer.Length);
            var bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            if (bytesInBuffer > BlockSize - 16)
                Sha512Internal.Core(out _state, ref _state, ref block);
                block = default;

            block.X15 = (_totalBytes - 1) * 8;
            Sha512Internal.Core(out _state, ref _state, ref block);

            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 0, _state.X0);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 8, _state.X1);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 16, _state.X2);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 24, _state.X3);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 32, _state.X4);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 40, _state.X5);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 48, _state.X6);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 56, _state.X7);
            _state = default;
        /// <summary>
        /// Finalizes SHA-512 hashing
        /// </summary>
        /// <param name="output">Output buffer</param>
        public void Finalize(ArraySegment <byte> output)
            Contract.Requires <ArgumentNullException>(output.Array != null);
            Contract.Requires <ArgumentException>(output.Count == 64);

            Update(_padding, 0, _padding.Length);
            Array16 <ulong> block;

            ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer, 0);
            CryptoBytes.InternalWipe(_buffer, 0, _buffer.Length);
            int bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            if (bytesInBuffer > BlockSize - 16)
                Sha512Internal.Core(out _state, ref _state, ref block);
                block = default(Array16 <ulong>);
            block.x15 = (_totalBytes - 1) * 8;
            Sha512Internal.Core(out _state, ref _state, ref block);

            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 0, _state.x0);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 8, _state.x1);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 16, _state.x2);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 24, _state.x3);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 32, _state.x4);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 40, _state.x5);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 48, _state.x6);
            ByteIntegerConverter.StoreBigEndian64(output.Array, output.Offset + 56, _state.x7);
            _state = default(Array8 <ulong>);
        public void Salsa20_1000000()
            byte[] input = new byte[64] {
                6, 124, 83, 146, 38, 191, 9, 50, 4, 161, 47, 222, 122, 182, 223, 185,
                75, 27, 0, 216, 16, 122, 7, 89, 162, 104, 101, 147, 213, 21, 54, 95,
                225, 253, 139, 176, 105, 132, 23, 116, 76, 41, 176, 207, 221, 34, 157, 108,
                94, 94, 99, 52, 90, 117, 91, 220, 146, 190, 239, 143, 196, 176, 130, 186
            byte[] expectedOutput = new byte[64] {
                8, 18, 38, 199, 119, 76, 215, 67, 173, 127, 144, 162, 103, 212, 176, 217,
                192, 19, 233, 33, 159, 197, 154, 160, 128, 243, 219, 65, 171, 136, 135, 225,
                123, 11, 68, 86, 237, 82, 20, 155, 133, 189, 9, 83, 167, 116, 194, 78,
                122, 127, 195, 185, 185, 204, 188, 90, 245, 9, 183, 248, 226, 85, 245, 104
            byte[]           actualOutput = new byte[64];
            Array16 <UInt32> state;

            ByteIntegerConverter.Array16LoadLittleEndian32(out state, input, 0);
            for (int i = 0; i < 1000000; i++)
                SalsaCore.Salsa(out state, ref state, 20);
            ByteIntegerConverter.Array16StoreLittleEndian32(actualOutput, 0, ref state);

            TestHelpers.AssertEqualBytes(expectedOutput, actualOutput);
        public override void Sign(ArraySegment <byte> signature, ArraySegment <byte> message, ArraySegment <byte> key)
            if (signature.Array == null)
                throw new ArgumentNullException("signature.Array");
            if (message.Array == null)
                throw new ArgumentNullException("message.Array");
            if (key.Array == null)
                throw new ArgumentNullException("key.Array");
            if (key.Count != 32)
                throw new ArgumentException("Invalid key size", "key");
            if (signature.Count != 16)
                throw new ArgumentException("Invalid signature size", "signature");

            Array8 <UInt32> internalKey;

            ByteIntegerConverter.Array8LoadLittleEndian32(out internalKey, key.Array, key.Offset);
            Poly1305Donna.poly1305_auth(signature.Array, signature.Offset, message.Array, message.Offset, message.Count, ref internalKey);
        public static void HSalsa20(byte[] output, int outputOffset, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            Array16 <UInt32> state;

            state.x0  = SalsaConst0;
            state.x1  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 0);
            state.x2  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 4);
            state.x3  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 8);
            state.x4  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 12);
            state.x5  = SalsaConst1;
            state.x6  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 0);
            state.x7  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 4);
            state.x8  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 8);
            state.x9  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 12);
            state.x10 = SalsaConst2;
            state.x11 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 16);
            state.x12 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 20);
            state.x13 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 24);
            state.x14 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 28);
            state.x15 = SalsaConst3;

            SalsaCore.HSalsa(out state, ref state, 20);

            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, state.x0);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, state.x5);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, state.x10);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, state.x15);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 16, state.x6);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 20, state.x7);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 24, state.x8);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 28, state.x9);
        public void Update(byte[] data, int offset, int count)
            if (data == null)
                throw new ArgumentNullException(nameof(data));
            if (offset < 0)
                throw new ArgumentOutOfRangeException(nameof(offset));
            if (count < 0)
                throw new ArgumentOutOfRangeException(nameof(count));
            if (data.Length - offset < count)
                throw new ArgumentException("Requires offset + count <= data.Length");

            Array16 <ulong> block;
            var             bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            _totalBytes += (uint)count;

            if (_totalBytes >= ulong.MaxValue / 8)
                throw new InvalidOperationException("Too much data");
            // Fill existing buffer
            if (bytesInBuffer != 0)
                var toCopy = Math.Min(BlockSize - bytesInBuffer, count);
                Buffer.BlockCopy(data, offset, _buffer, bytesInBuffer, toCopy);
                offset        += toCopy;
                count         -= toCopy;
                bytesInBuffer += toCopy;
                if (bytesInBuffer == BlockSize)
                    ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer, 0);
                    Sha512Internal.Core(out _state, ref _state, ref block);
                    CryptoBytes.InternalWipe(_buffer, 0, _buffer.Length);
                    bytesInBuffer = 0;

            // Hash complete blocks without copying
            while (count >= BlockSize)
                ByteIntegerConverter.Array16LoadBigEndian64(out block, data, offset);
                Sha512Internal.Core(out _state, ref _state, ref block);
                offset += BlockSize;
                count  -= BlockSize;

            // Copy remainder into buffer
            if (count > 0)
                Buffer.BlockCopy(data, offset, _buffer, bytesInBuffer, count);
        public override bool Verify(ArraySegment <byte> signature, ArraySegment <byte> message, ArraySegment <byte> key)
            if (signature.Array == null)
                throw new ArgumentNullException("signature.Array");
            if (message.Array == null)
                throw new ArgumentNullException("message.Array");
            if (key.Array == null)
                throw new ArgumentNullException("key.Array");
            if (key.Count != 32)
                throw new ArgumentException("Invalid key size", "key");
            if (signature.Count != 16)
                throw new ArgumentException("Invalid signature size", "signature");

            var tempBytes = new byte[16];//todo: remove allocation

            ByteIntegerConverter.Array8LoadLittleEndian32(out Array8 <uint> internalKey, key.Array, key.Offset);
            Poly1305Donna.poly1305_auth(tempBytes, 0, message.Array, message.Offset, message.Count, ref internalKey);
            return(CryptoBytes.ConstantTimeEquals(new ArraySegment <byte>(tempBytes), signature));
        private static void EncryptInternal(byte[] ciphertext, int ciphertextOffset, byte[] message, int messageOffset, int messageLength, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            Array16 <UInt32> internalKey;

            PrepareInternalKey(out internalKey, key, keyOffset, nonce, nonceOffset);

            Array16 <UInt32> temp;
            var             tempBytes = new byte[64];//todo: remove allocation
            Array8 <UInt32> poly1305Key;

            // first iteration
                SalsaCore.Salsa(out temp, ref internalKey, 20);

                //first half is for Poly1305
                poly1305Key.x0 = temp.x0;
                poly1305Key.x1 = temp.x1;
                poly1305Key.x2 = temp.x2;
                poly1305Key.x3 = temp.x3;
                poly1305Key.x4 = temp.x4;
                poly1305Key.x5 = temp.x5;
                poly1305Key.x6 = temp.x6;
                poly1305Key.x7 = temp.x7;

                // second half for the message
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 0, temp.x8);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 4, temp.x9);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 8, temp.x10);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 12, temp.x11);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 16, temp.x12);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 20, temp.x13);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 24, temp.x14);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 28, temp.x15);
                int count = Math.Min(32, messageLength);
                for (int i = 0; i < count; i++)
                    ciphertext[16 + ciphertextOffset + i] = (byte)(message[messageOffset + i] ^ tempBytes[i]);

            // later iterations
            int blockOffset = 32;

            while (blockOffset < messageLength)
                SalsaCore.Salsa(out temp, ref internalKey, 20);
                ByteIntegerConverter.Array16StoreLittleEndian32(tempBytes, 0, ref temp);
                int count = Math.Min(64, messageLength - blockOffset);
                for (int i = 0; i < count; i++)
                    ciphertext[16 + ciphertextOffset + blockOffset + i] = (byte)(message[messageOffset + blockOffset + i] ^ tempBytes[i]);
                blockOffset += 64;

            // compute MAC
            Poly1305Donna.poly1305_auth(ciphertext, ciphertextOffset, ciphertext, ciphertextOffset + 16, messageLength, ref poly1305Key);
        /// <summary>
        /// Updates internal state with data from the provided array.
        /// </summary>
        /// <param name="data">Array of bytes</param>
        /// <param name="index">Offset of byte sequence</param>
        /// <param name="length">Sequence length</param>
        public void Update(byte[] data, int index, int length)
            // Contract.Requires<ArgumentNullException>(data != null);
            if (data == null)
                throw new ArgumentNullException();
            // Contract.Requires<ArgumentOutOfRangeException>(index >=0 && length >= 0);
            if (index < 0 || length < 0)
                throw new ArgumentOutOfRangeException();
            // Contract.Requires<ArgumentException>((index + length) <= data.Length);
            if ((index + length) > data.Length)
                throw new ArgumentException();

            Array16 <ulong> block;
            int             bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            _totalBytes += (uint)length;

            if (_totalBytes >= ulong.MaxValue / 8)
                throw new InvalidOperationException("Too much data");
            // Fill existing buffer
            if (bytesInBuffer != 0)
                var toCopy = Math.Min(BlockSize - bytesInBuffer, length);
                Buffer.BlockCopy(data, index, _buffer, bytesInBuffer, toCopy);
                index         += toCopy;
                length        -= toCopy;
                bytesInBuffer += toCopy;
                if (bytesInBuffer == BlockSize)
                    ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer, 0);
                    Sha512Internal.Core(out _state, ref _state, ref block);
                    CryptoBytes.InternalWipe(_buffer, 0, _buffer.Length);
                    bytesInBuffer = 0;
            // Hash complete blocks without copying
            while (length >= BlockSize)
                ByteIntegerConverter.Array16LoadBigEndian64(out block, data, index);
                Sha512Internal.Core(out _state, ref _state, ref block);
                index  += BlockSize;
                length -= BlockSize;
            // Copy remainder into buffer
            if (length > 0)
                Buffer.BlockCopy(data, index, _buffer, bytesInBuffer, length);
        public void Finish(Span <byte> output)
            if (output.Length != 64)
                throw new ArgumentException("output.Count must be 64");

            Array16 <ulong> block;

            ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer);
            CryptoBytes.Wipe(_buffer, 0, _buffer.Length);
            int bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            if (bytesInBuffer > BlockSize - 16)
                Sha512Internal.Core(out _state, in _state, in block);
                block = default;
            block = new Array16 <ulong>(
                (_totalBytes - 1) * 8);

            Sha512Internal.Core(out _state, in _state, in block);

            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(0), _state.x0);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(8), _state.x1);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(16), _state.x2);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(24), _state.x3);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(32), _state.x4);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(40), _state.x5);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(48), _state.x6);
            BinaryPrimitives.WriteUInt64BigEndian(output.Slice(56), _state.x7);
            _state = default;
        public static Array8 <uint> ToArray8(this byte[] source)
            return(new Array8 <uint>
                x0 = ByteIntegerConverter.LoadLittleEndian32(source, 0),
                x1 = ByteIntegerConverter.LoadLittleEndian32(source, 4),
                x2 = ByteIntegerConverter.LoadLittleEndian32(source, 8),
                x3 = ByteIntegerConverter.LoadLittleEndian32(source, 12),

                x4 = ByteIntegerConverter.LoadLittleEndian32(source, 16),
                x5 = ByteIntegerConverter.LoadLittleEndian32(source, 20),
                x6 = ByteIntegerConverter.LoadLittleEndian32(source, 24),
                x7 = ByteIntegerConverter.LoadLittleEndian32(source, 28)
        private static void HSalsa20Core(int size)
            byte[] input = new byte[64] {
                6, 124, 83, 146, 38, 191, 9, 50, 4, 161, 47, 222, 122, 182, 223, 185,
                75, 27, 0, 216, 16, 122, 7, 89, 162, 104, 101, 147, 213, 21, 54, 95,
                225, 253, 139, 176, 105, 132, 23, 116, 76, 41, 176, 207, 221, 34, 157, 108,
                94, 94, 99, 52, 90, 117, 91, 220, 146, 190, 239, 143, 196, 176, 130, 186
            Array16 <UInt32> state;

            ByteIntegerConverter.Array16LoadLittleEndian32(out state, input, 0);
            for (int i = 0; i < (size + 63) / 64; i++)
                SalsaCore.HSalsa(out state, ref state, 20);
        private static void PrepareInternalKey(out Array16 <UInt32> internalKey, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            internalKey.x0  = Salsa20.SalsaConst0;
            internalKey.x1  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 0);
            internalKey.x2  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 4);
            internalKey.x3  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 8);
            internalKey.x4  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 12);
            internalKey.x5  = Salsa20.SalsaConst1;
            internalKey.x6  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 0);
            internalKey.x7  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 4);
            internalKey.x8  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 8);
            internalKey.x9  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 12);
            internalKey.x10 = Salsa20.SalsaConst2;
            internalKey.x11 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 16);
            internalKey.x12 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 20);
            internalKey.x13 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 24);
            internalKey.x14 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 28);
            internalKey.x15 = Salsa20.SalsaConst3;
            SalsaCore.HSalsa(out internalKey, ref internalKey, 20);

            internalKey.x1  = internalKey.x0;
            internalKey.x2  = internalKey.x5;
            internalKey.x3  = internalKey.x10;
            internalKey.x4  = internalKey.x15;
            internalKey.x11 = internalKey.x6;
            internalKey.x12 = internalKey.x7;
            internalKey.x13 = internalKey.x8;
            internalKey.x14 = internalKey.x9;

            internalKey.x0  = Salsa20.SalsaConst0;
            internalKey.x5  = Salsa20.SalsaConst1;
            internalKey.x10 = Salsa20.SalsaConst2;
            internalKey.x15 = Salsa20.SalsaConst3;

            internalKey.x6 = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 16);
            internalKey.x7 = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 20);

            internalKey.x8 = 0;
            internalKey.x9 = 0;
        public static void GetPublicKey(ArraySegment\\ publicKey, ArraySegment\\ privateKey)
            if (publicKey.Array == null)
                throw new ArgumentNullException("publicKey.Array");
            if (privateKey.Array == null)
                throw new ArgumentNullException("privateKey.Array");
            if (publicKey.Count != PublicKeySizeInBytes)
                throw new ArgumentException("privateKey.Count must be 32");
            if (privateKey.Count != PrivateKeySizeInBytes)
                throw new ArgumentException("privateKey.Count must be 32");

            // hack: abusing publicKey as temporary storage
            // todo: remove hack
            for (int i = 0; i \\ salsaState;
            salsaState.x0 = c0;
            salsaState.x1 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 0);
            salsaState.x2 = 0;
            salsaState.x3 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 4);
            salsaState.x4 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 8);
            salsaState.x5 = c1;
            salsaState.x6 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 12);
            salsaState.x7 = 0;
            salsaState.x8 = 0;
            salsaState.x9 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 16);
            salsaState.x10 = c2;
            salsaState.x11 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 20);
            salsaState.x12 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 24);
            salsaState.x13 = 0;
            salsaState.x14 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 28);
            salsaState.x15 = c3;
            SalsaCore.Salsa(out salsaState, ref salsaState, 20);

            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 0, salsaState.x0);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 4, salsaState.x1);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 8, salsaState.x2);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 12, salsaState.x3);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 16, salsaState.x4);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 20, salsaState.x5);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 24, salsaState.x6);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 28, salsaState.x7);
        public void Update(ReadOnlySpan <byte> data)
            Array16 <ulong> block;
            int             bytesInBuffer = (int)_totalBytes & (BlockSize - 1);

            _totalBytes += (uint)data.Length;
            var bufferSpan = _buffer.AsSpan();

            if (_totalBytes >= ulong.MaxValue / 8)
                throw new InvalidOperationException("Too much data");
            // Fill existing buffer
            if (bytesInBuffer != 0)
                var toCopy = Math.Min(BlockSize - bytesInBuffer, data.Length);
                data.Slice(0, toCopy).CopyTo(bufferSpan.Slice(bytesInBuffer));
                data = data.Slice(toCopy);

                bytesInBuffer += toCopy;
                if (bytesInBuffer == BlockSize)
                    ByteIntegerConverter.Array16LoadBigEndian64(out block, _buffer);
                    Sha512Internal.Core(out _state, in _state, in block);
                    CryptoBytes.Wipe(_buffer, 0, _buffer.Length);
                    bytesInBuffer = 0;
            // Hash complete blocks without copying
            while (data.Length >= BlockSize)
                ByteIntegerConverter.Array16LoadBigEndian64(out block, data);
                Sha512Internal.Core(out _state, in _state, in block);
                data = data.Slice(BlockSize);
            // Copy remainder into buffer
            if (data.Length > 0)
        public override byte[] Sign(byte[] message, byte[] key)
            if (message == null)
                throw new ArgumentNullException("message");
            if (key == null)
                throw new ArgumentNullException("key");
            if (key.Length != 32)
                throw new ArgumentException("Invalid key size", "key");

            var             result = new byte[16];
            Array8 <UInt32> internalKey;

            ByteIntegerConverter.Array8LoadLittleEndian32(out internalKey, key, 0);
            Poly1305Donna.poly1305_auth(result, 0, message, 0, message.Length, ref internalKey);
        // hashes like the Curve25519 paper says
        internal static void KeyExchangeOutputHashCurve25519Paper(byte[] sharedKey, int offset)
            //c = Curve25519output
            const UInt32 c0 = 'C' | 'u' << 8 | 'r' << 16 | (UInt32)'v' << 24;
            const UInt32 c1 = 'e' | '2' << 8 | '5' << 16 | (UInt32)'5' << 24;
            const UInt32 c2 = '1' | '9' << 8 | 'o' << 16 | (UInt32)'u' << 24;
            const UInt32 c3 = 't' | 'p' << 8 | 'u' << 16 | (UInt32)'t' << 24;

            Array16 <UInt32> salsaState;

            salsaState.x0  = c0;
            salsaState.x1  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 0);
            salsaState.x2  = 0;
            salsaState.x3  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 4);
            salsaState.x4  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 8);
            salsaState.x5  = c1;
            salsaState.x6  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 12);
            salsaState.x7  = 0;
            salsaState.x8  = 0;
            salsaState.x9  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 16);
            salsaState.x10 = c2;
            salsaState.x11 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 20);
            salsaState.x12 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 24);
            salsaState.x13 = 0;
            salsaState.x14 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 28);
            salsaState.x15 = c3;
            SalsaCore.Salsa(out salsaState, ref salsaState, 20);

            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 0, salsaState.x0);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 4, salsaState.x1);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 8, salsaState.x2);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 12, salsaState.x3);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 16, salsaState.x4);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 20, salsaState.x5);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 24, salsaState.x6);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 28, salsaState.x7);
        public void Salsa20()
            byte[] input = new byte[64] {
                211, 159, 13, 115, 76, 55, 82, 183, 3, 117, 222, 37, 191, 187, 234, 136,
                49, 237, 179, 48, 1, 106, 178, 219, 175, 199, 166, 48, 86, 16, 179, 207,
                31, 240, 32, 63, 15, 83, 93, 161, 116, 147, 48, 113, 238, 55, 204, 36,
                79, 201, 235, 79, 3, 81, 156, 47, 203, 26, 244, 243, 88, 118, 104, 54
            byte[] expectedOutput = new byte[64] {
                109, 42, 178, 168, 156, 240, 248, 238, 168, 196, 190, 203, 26, 110, 170, 154,
                29, 29, 150, 26, 150, 30, 235, 249, 190, 163, 251, 48, 69, 144, 51, 57,
                118, 40, 152, 157, 180, 57, 27, 94, 107, 42, 236, 35, 27, 111, 114, 114,
                219, 236, 232, 135, 111, 155, 110, 18, 24, 232, 95, 158, 179, 19, 48, 202
            byte[]           actualOutput = new byte[64];
            Array16 <UInt32> inputState;
            Array16 <UInt32> outputState;

            ByteIntegerConverter.Array16LoadLittleEndian32(out inputState, input, 0);
            SalsaCore.Salsa(out outputState, ref inputState, 20);
            ByteIntegerConverter.Array16StoreLittleEndian32(actualOutput, 0, ref outputState);

            TestHelpers.AssertEqualBytes(expectedOutput, actualOutput);
        // written by floodyberry (Andrew M.)
        // original license: MIT or PUBLIC DOMAIN
        // https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-unrolled.c
        public static void poly1305_auth(byte[] output, int outputOffset, byte[] m, int mStart, int mLength, ref Array8 <UInt32> key)
            UInt32 t0, t1, t2, t3;
            UInt32 h0, h1, h2, h3, h4;
            UInt32 r0, r1, r2, r3, r4;
            UInt32 s1, s2, s3, s4;
            UInt32 b, nb;
            int    j;
            UInt64 tt0, tt1, tt2, tt3, tt4;
            UInt64 f0, f1, f2, f3;
            UInt32 g0, g1, g2, g3, g4;
            UInt64 c;

            /* clamp key */
            t0 = key.x0;
            t1 = key.x1;
            t2 = key.x2;
            t3 = key.x3;

            /* precompute multipliers */
            r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
            r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
            r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
            r3 = t2 & 0x3f03fff; t3 >>= 8;
            r4 = t3 & 0x00fffff;

            s1 = r1 * 5;
            s2 = r2 * 5;
            s3 = r3 * 5;
            s4 = r4 * 5;

            /* init state */
            h0 = 0;
            h1 = 0;
            h2 = 0;
            h3 = 0;
            h4 = 0;

            /* full blocks */
            if (mLength < 16)
                goto poly1305_donna_atmost15bytes;

            mStart  += 16;
            mLength -= 16;

            t0 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 16);
            t1 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 12);
            t2 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 4);

            //todo: looks like these can be simplified a bit
            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += (t3 >> 8) | (1 << 24);

            tt0 = (ulong)h0 * r0 + (ulong)h1 * s4 + (ulong)h2 * s3 + (ulong)h3 * s2 + (ulong)h4 * s1;
            tt1 = (ulong)h0 * r1 + (ulong)h1 * r0 + (ulong)h2 * s4 + (ulong)h3 * s3 + (ulong)h4 * s2;
            tt2 = (ulong)h0 * r2 + (ulong)h1 * r1 + (ulong)h2 * r0 + (ulong)h3 * s4 + (ulong)h4 * s3;
            tt3 = (ulong)h0 * r3 + (ulong)h1 * r2 + (ulong)h2 * r1 + (ulong)h3 * r0 + (ulong)h4 * s4;
            tt4 = (ulong)h0 * r4 + (ulong)h1 * r3 + (ulong)h2 * r2 + (ulong)h3 * r1 + (ulong)h4 * r0;

                h0   = (UInt32)tt0 & 0x3ffffff; c = (tt0 >> 26);
                tt1 += c; h1 = (UInt32)tt1 & 0x3ffffff; b = (UInt32)(tt1 >> 26);
                tt2 += b; h2 = (UInt32)tt2 & 0x3ffffff; b = (UInt32)(tt2 >> 26);
                tt3 += b; h3 = (UInt32)tt3 & 0x3ffffff; b = (UInt32)(tt3 >> 26);
                tt4 += b; h4 = (UInt32)tt4 & 0x3ffffff; b = (UInt32)(tt4 >> 26);
            h0 += b * 5;

            if (mLength >= 16)
                goto poly1305_donna_16bytes;

            /* final bytes */
            if (mLength == 0)
                goto poly1305_donna_finish;

            byte[] mp = new byte[16];//todo remove allocation

            for (j = 0; j < mLength; j++)
                mp[j] = m[mStart + j];
            mp[j++] = 1;
            for (; j < 16; j++)
                mp[j] = 0;
            mLength = 0;

            t0 = ByteIntegerConverter.LoadLittleEndian32(mp, 0);
            t1 = ByteIntegerConverter.LoadLittleEndian32(mp, 4);
            t2 = ByteIntegerConverter.LoadLittleEndian32(mp, 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(mp, 12);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += t3 >> 8;

            goto poly1305_donna_mul;

            b   = h0 >> 26; h0 = h0 & 0x3ffffff;
            h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff;
            h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff;
            h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff;
            h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff;
            h0 += b * 5;

            g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
            g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
            g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
            g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
            g4 = unchecked (h4 + b - (1 << 26));

            b  = (g4 >> 31) - 1;
            nb = ~b;
            h0 = (h0 & nb) | (g0 & b);
            h1 = (h1 & nb) | (g1 & b);
            h2 = (h2 & nb) | (g2 & b);
            h3 = (h3 & nb) | (g3 & b);
            h4 = (h4 & nb) | (g4 & b);

            f0 = ((h0) | (h1 << 26)) + (UInt64)key.x4;
            f1 = ((h1 >> 6) | (h2 << 20)) + (UInt64)key.x5;
            f2 = ((h2 >> 12) | (h3 << 14)) + (UInt64)key.x6;
            f3 = ((h3 >> 18) | (h4 << 8)) + (UInt64)key.x7;

                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, (uint)f0); f1 += (f0 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, (uint)f1); f2 += (f1 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, (uint)f2); f3 += (f2 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, (uint)f3);
        // written by floodyberry (Andrew M.)
        // original license: MIT or PUBLIC DOMAIN
        // https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-unrolled.c
        public static void poly1305_auth(byte[] output, int outputOffset, byte[] m, int mStart, int mLength,
                                         ref Array8 <uint> key)
            uint b;
            int  j;

            /* clamp key */
            var t0 = key.x0;
            var t1 = key.x1;
            var t2 = key.x2;
            var t3 = key.x3;

            /* precompute multipliers */
            var r0 = t0 & 0x3ffffff;

            t0 >>= 26;
            t0  |= t1 << 6;
            var r1 = t0 & 0x3ffff03;

            t1 >>= 20;
            t1  |= t2 << 12;
            var r2 = t1 & 0x3ffc0ff;

            t2 >>= 14;
            t2  |= t3 << 18;
            var r3 = t2 & 0x3f03fff;

            t3 >>= 8;
            var r4 = t3 & 0x00fffff;

            var s1 = r1 * 5;
            var s2 = r2 * 5;
            var s3 = r3 * 5;
            var s4 = r4 * 5;

            /* init state */
            uint h0 = 0;
            uint h1 = 0;
            uint h2 = 0;
            uint h3 = 0;
            uint h4 = 0;

            /* full blocks */
            if (mLength < 16)
                goto poly1305_donna_atmost15bytes;

            mStart  += 16;
            mLength -= 16;

            t0 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 16);
            t1 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 12);
            t2 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 4);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((ulong)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((ulong)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((ulong)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += (t3 >> 8) | (1 << 24);

            var tt0 = (ulong)h0 * r0 + (ulong)h1 * s4 + (ulong)h2 * s3 + (ulong)h3 * s2 + (ulong)h4 * s1;
            var tt1 = (ulong)h0 * r1 + (ulong)h1 * r0 + (ulong)h2 * s4 + (ulong)h3 * s3 + (ulong)h4 * s2;
            var tt2 = (ulong)h0 * r2 + (ulong)h1 * r1 + (ulong)h2 * r0 + (ulong)h3 * s4 + (ulong)h4 * s3;
            var tt3 = (ulong)h0 * r3 + (ulong)h1 * r2 + (ulong)h2 * r1 + (ulong)h3 * r0 + (ulong)h4 * s4;
            var tt4 = (ulong)h0 * r4 + (ulong)h1 * r3 + (ulong)h2 * r2 + (ulong)h3 * r1 + (ulong)h4 * r0;

                h0 = (uint)tt0 & 0x3ffffff;
                var c = tt0 >> 26;
                tt1 += c;
                h1   = (uint)tt1 & 0x3ffffff;
                b    = (uint)(tt1 >> 26);
                tt2 += b;
                h2   = (uint)tt2 & 0x3ffffff;
                b    = (uint)(tt2 >> 26);
                tt3 += b;
                h3   = (uint)tt3 & 0x3ffffff;
                b    = (uint)(tt3 >> 26);
                tt4 += b;
                h4   = (uint)tt4 & 0x3ffffff;
                b    = (uint)(tt4 >> 26);

            h0 += b * 5;

            if (mLength >= 16)
                goto poly1305_donna_16bytes;

            /* final bytes */
            if (mLength == 0)
                goto poly1305_donna_finish;

            var mp = new byte[16];

            for (j = 0; j < mLength; j++)
                mp[j] = m[mStart + j];
            mp[j++] = 1;
            for (; j < 16; j++)
                mp[j] = 0;
            mLength = 0;

            t0 = ByteIntegerConverter.LoadLittleEndian32(mp, 0);
            t1 = ByteIntegerConverter.LoadLittleEndian32(mp, 4);
            t2 = ByteIntegerConverter.LoadLittleEndian32(mp, 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(mp, 12);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((ulong)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((ulong)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((ulong)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += t3 >> 8;

            goto poly1305_donna_mul;

            b   = h0 >> 26;
            h0  = h0 & 0x3ffffff;
            h1 += b;
            b   = h1 >> 26;
            h1  = h1 & 0x3ffffff;
            h2 += b;
            b   = h2 >> 26;
            h2  = h2 & 0x3ffffff;
            h3 += b;
            b   = h3 >> 26;
            h3  = h3 & 0x3ffffff;
            h4 += b;
            b   = h4 >> 26;
            h4  = h4 & 0x3ffffff;
            h0 += b * 5;

            var g0 = h0 + 5;

            b   = g0 >> 26;
            g0 &= 0x3ffffff;
            var g1 = h1 + b;

            b   = g1 >> 26;
            g1 &= 0x3ffffff;
            var g2 = h2 + b;

            b   = g2 >> 26;
            g2 &= 0x3ffffff;
            var g3 = h3 + b;

            b   = g3 >> 26;
            g3 &= 0x3ffffff;
            var g4 = unchecked (h4 + b - (1 << 26));

            b = (g4 >> 31) - 1;
            var nb = ~b;

            h0 = (h0 & nb) | (g0 & b);
            h1 = (h1 & nb) | (g1 & b);
            h2 = (h2 & nb) | (g2 & b);
            h3 = (h3 & nb) | (g3 & b);
            h4 = (h4 & nb) | (g4 & b);

            var f0 = (h0 | (h1 << 26)) + (ulong)key.x4;
            var f1 = ((h1 >> 6) | (h2 << 20)) + (ulong)key.x5;
            var f2 = ((h2 >> 12) | (h3 << 14)) + (ulong)key.x6;
            var f3 = ((h3 >> 18) | (h4 << 8)) + (ulong)key.x7;

                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, (uint)f0);
                f1 += f0 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, (uint)f1);
                f2 += f1 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, (uint)f2);
                f3 += f2 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, (uint)f3);
        private static bool DecryptInternal(byte[] plaintext, int plaintextOffset, byte[] ciphertext, int ciphertextOffset, int ciphertextLength, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            int plaintextLength = ciphertextLength - MacSizeInBytes;
            Array16\\ internalKey;
            PrepareInternalKey(out internalKey, key, keyOffset, nonce, nonceOffset);

            Array16\\ temp;
            var tempBytes = new byte[64];//todo: remove allocation

            // first iteration
                SalsaCore.Salsa(out temp, ref internalKey, 20);

                //first half is for Poly1305
                Array8\\ poly1305Key;
                poly1305Key.x0 = temp.x0;
                poly1305Key.x1 = temp.x1;
                poly1305Key.x2 = temp.x2;
                poly1305Key.x3 = temp.x3;
                poly1305Key.x4 = temp.x4;
                poly1305Key.x5 = temp.x5;
                poly1305Key.x6 = temp.x6;
                poly1305Key.x7 = temp.x7;

                // compute MAC
                Poly1305Donna.poly1305_auth(tempBytes, 0, ciphertext, ciphertextOffset + 16, plaintextLength, ref poly1305Key);
                if (!CryptoBytes.ConstantTimeEquals(tempBytes, 0, ciphertext, ciphertextOffset, MacSizeInBytes))
                    Array.Clear(plaintext, plaintextOffset, plaintextLength);
                    return false;

                // rest for the message
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 0, temp.x8);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 4, temp.x9);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 8, temp.x10);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 12, temp.x11);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 16, temp.x12);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 20, temp.x13);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 24, temp.x14);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 28, temp.x15);
                int count = Math.Min(32, plaintextLength);
                for (int i = 0; i \\ internalKey;
            PrepareInternalKey(out internalKey, key, keyOffset, nonce, nonceOffset);

            Array16\\ temp;
            var tempBytes = new byte[64];//todo: remove allocation
            Array8\\ poly1305Key;

            // first iteration
                SalsaCore.Salsa(out temp, ref internalKey, 20);

                //first half is for Poly1305
                poly1305Key.x0 = temp.x0;
                poly1305Key.x1 = temp.x1;
                poly1305Key.x2 = temp.x2;
                poly1305Key.x3 = temp.x3;
                poly1305Key.x4 = temp.x4;
                poly1305Key.x5 = temp.x5;
                poly1305Key.x6 = temp.x6;
                poly1305Key.x7 = temp.x7;

                // second half for the message
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 0, temp.x8);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 4, temp.x9);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 8, temp.x10);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 12, temp.x11);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 16, temp.x12);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 20, temp.x13);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 24, temp.x14);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 28, temp.x15);
                int count = Math.Min(32, messageLength);
                for (int i = 0; i \
        private static bool DecryptInternal(byte[] plaintext, int plaintextOffset, byte[] ciphertext,
                                            int ciphertextOffset, int ciphertextLength, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
            int            plaintextLength = ciphertextLength - MacSizeInBytes;
            Array16 <uint> internalKey;

            PrepareInternalKey(out internalKey, key, keyOffset, nonce, nonceOffset);

            Array16 <uint> temp;

            byte[] tempBytes = new byte[64]; //todo: remove allocation

            // first iteration
                SalsaCore.HSalsa(out temp, ref internalKey, 20);

                //first half is for Poly1305
                Array8 <uint> poly1305Key;
                poly1305Key.x0 = temp.x0;
                poly1305Key.x1 = temp.x1;
                poly1305Key.x2 = temp.x2;
                poly1305Key.x3 = temp.x3;
                poly1305Key.x4 = temp.x4;
                poly1305Key.x5 = temp.x5;
                poly1305Key.x6 = temp.x6;
                poly1305Key.x7 = temp.x7;

                // compute MAC
                Poly1305Donna.poly1305_auth(tempBytes, 0, ciphertext, ciphertextOffset + 16, plaintextLength,
                                            ref poly1305Key);

                if (!CryptoBytes.ConstantTimeEquals(tempBytes, 0, ciphertext, ciphertextOffset, MacSizeInBytes))
                    Array.Clear(plaintext, plaintextOffset, plaintextLength);

                // rest for the message
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 0, temp.x8);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 4, temp.x9);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 8, temp.x10);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 12, temp.x11);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 16, temp.x12);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 20, temp.x13);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 24, temp.x14);
                ByteIntegerConverter.StoreLittleEndian32(tempBytes, 28, temp.x15);
                int count = Math.Min(32, plaintextLength);

                for (int i = 0; i < count; i++)
                    plaintext[plaintextOffset + i] =
                        (byte)(ciphertext[MacSizeInBytes + ciphertextOffset + i] ^ tempBytes[i]);

            // later iterations
            int blockOffset = 32;

            while (blockOffset < plaintextLength)
                SalsaCore.HSalsa(out temp, ref internalKey, 20);
                ByteIntegerConverter.Array16StoreLittleEndian32(tempBytes, 0, ref temp);
                int count = Math.Min(64, plaintextLength - blockOffset);

                for (int i = 0; i < count; i++)
                    plaintext[plaintextOffset + blockOffset + i] =
                        (byte)(ciphertext[16 + ciphertextOffset + blockOffset + i] ^ tempBytes[i]);

                blockOffset += 64;
