Unity_Web/Assets/Best HTTP/Source/Connections/TLS/Crypto/Impl/FastAesEngineHelper.cs

196 lines
7.4 KiB
C#

#if !BESTHTTP_DISABLE_ALTERNATE_SSL && (!UNITY_WEBGL || UNITY_EDITOR)
using System;
using BestHTTP.SecureProtocol.Org.BouncyCastle.Crypto.Utilities;
namespace BestHTTP.Connections.TLS.Crypto.Impl
{
internal static class FastAesEngineHelper
{
public unsafe static void EncryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW, int ROUNDS, uint[] T0, byte[] S, byte[] s)
{
uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
uint[] kw = KW[0];
uint t0 = C0 ^ kw[0];
uint t1 = C1 ^ kw[1];
uint t2 = C2 ^ kw[2];
uint r0, r1, r2, r3 = C3 ^ kw[3];
int r = 1;
byte idx;
uint tmp1, tmp2, tmp3;
fixed (uint* pT0 = T0)
{
while (r < ROUNDS - 1)
{
kw = KW[r++];
fixed (uint* pkw = kw)
{
idx = (byte)(t1 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t2 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r3 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r0 = pT0[t0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
idx = (byte)(t2 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r3 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t0 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r1 = pT0[t1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
idx = (byte)(r3 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t0 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t1 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r2 = pT0[t2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
idx = (byte)(t0 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t1 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t2 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
}
kw = KW[r++];
fixed (uint* pkw = kw)
{
idx = (byte)(r1 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r2 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r3 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
t0 = pT0[r0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
idx = (byte)(r2 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r3 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r0 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
t1 = pT0[r1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
idx = (byte)(r3 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r0 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r1 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
t2 = pT0[r2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
idx = (byte)(r0 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r1 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r2 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
}
}
kw = KW[r++];
fixed (uint* pkw = kw)
{
idx = (byte)(t1 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t2 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(r3 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r0 = pT0[t0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
idx = (byte)(t2 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(r3 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t0 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r1 = pT0[t1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
idx = (byte)(r3 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t0 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t1 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r2 = pT0[t2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
idx = (byte)(t0 >> 8);
tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
idx = (byte)(t1 >> 16);
tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
idx = (byte)(t2 >> 24);
tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
}
// the final round's table is a simple function of S so we don't use a whole other four tables for it
kw = KW[r];
fixed (byte* pS = S, ps = s)
fixed (uint* pkw = kw)
{
C0 = (uint)pS[(byte)r0] ^ (((uint)pS[(byte)(r1 >> 8)]) << 8) ^ (((uint)ps[(byte)(r2 >> 16)]) << 16) ^ (((uint)ps[(byte)(r3 >> 24)]) << 24) ^ pkw[0];
C1 = (uint)ps[(byte)r1] ^ (((uint)pS[(byte)(r2 >> 8)]) << 8) ^ (((uint)pS[(byte)(r3 >> 16)]) << 16) ^ (((uint)ps[(byte)(r0 >> 24)]) << 24) ^ pkw[1];
C2 = (uint)ps[(byte)r2] ^ (((uint)pS[(byte)(r3 >> 8)]) << 8) ^ (((uint)pS[(byte)(r0 >> 16)]) << 16) ^ (((uint)pS[(byte)(r1 >> 24)]) << 24) ^ pkw[2];
C3 = (uint)ps[(byte)r3] ^ (((uint)ps[(byte)(r0 >> 8)]) << 8) ^ (((uint)ps[(byte)(r1 >> 16)]) << 16) ^ (((uint)pS[(byte)(r2 >> 24)]) << 24) ^ pkw[3];
}
}
Pack.UInt32_To_LE(C0, output, outOff + 0);
Pack.UInt32_To_LE(C1, output, outOff + 4);
Pack.UInt32_To_LE(C2, output, outOff + 8);
Pack.UInt32_To_LE(C3, output, outOff + 12);
}
}
}
#endif