/* Find the difference of two numbers: output = in - output * (note the order of the arguments!) */ static void fdifference(limb *output, limb *input) { for (int i = 0; i < 10; ++i) { output[i] = (input[i] - output[i]); } }
/* Multiply a number by a scalar: output = in * scalar */ static void fscalar_product(limb *output, limb *input, limb scalar) { for (int i = 0; i < 10; ++i) { output[i] = input[i] * scalar; } }
/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */ static void freduce_degree(limb *output) { /* Each of these shifts and adds ends up multiplying the value by 19. */ output[8] += output[18] << 4; output[8] += output[18] << 1; output[8] += output[18]; output[7] += output[17] << 4; output[7] += output[17] << 1; output[7] += output[17]; output[6] += output[16] << 4; output[6] += output[16] << 1; output[6] += output[16]; output[5] += output[15] << 4; output[5] += output[15] << 1; output[5] += output[15]; output[4] += output[14] << 4; output[4] += output[14] << 1; output[4] += output[14]; output[3] += output[13] << 4; output[3] += output[13] << 1; output[3] += output[13]; output[2] += output[12] << 4; output[2] += output[12] << 1; output[2] += output[12]; output[1] += output[11] << 4; output[1] += output[11] << 1; output[1] += output[11]; output[0] += output[10] << 4; output[0] += output[10] << 1; output[0] += output[10]; }
/* Field element representation: * * Field elements are written as an array of signed, 64-bit limbs, least * significant first. The value of the field element is: * x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ... * * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */ /* Sum two numbers: output += in */ private static void fsum(limb *output, limb *input) { for (int i = 0; i < 10; i += 2) { output[0 + i] = (output[0 + i] + input[0 + i]); output[1 + i] = (output[1 + i] + input[1 + i]); } }
static void fsquare_inner(limb *output, limb *input) { output[0] = ((limb)((int)input[0])) * ((int)input[0]); output[1] = 2 * ((limb)((int)input[0])) * ((int)input[1]); output[2] = 2 * (((limb)((int)input[1])) * ((int)input[1]) + ((limb)((int)input[0])) * ((int)input[2])); output[3] = 2 * (((limb)((int)input[1])) * ((int)input[2]) + ((limb)((int)input[0])) * ((int)input[3])); output[4] = ((limb)((int)input[2])) * ((int)input[2]) + 4 * ((limb)((int)input[1])) * ((int)input[3]) + 2 * ((limb)((int)input[0])) * ((int)input[4]); output[5] = 2 * (((limb)((int)input[2])) * ((int)input[3]) + ((limb)((int)input[1])) * ((int)input[4]) + ((limb)((int)input[0])) * ((int)input[5])); output[6] = 2 * (((limb)((int)input[3])) * ((int)input[3]) + ((limb)((int)input[2])) * ((int)input[4]) + ((limb)((int)input[0])) * ((int)input[6]) + 2 * ((limb)((int)input[1])) * ((int)input[5])); output[7] = 2 * (((limb)((int)input[3])) * ((int)input[4]) + ((limb)((int)input[2])) * ((int)input[5]) + ((limb)((int)input[1])) * ((int)input[6]) + ((limb)((int)input[0])) * ((int)input[7])); output[8] = ((limb)((int)input[4])) * ((int)input[4]) + 2 * (((limb)((int)input[2])) * ((int)input[6]) + ((limb)((int)input[0])) * ((int)input[8]) + 2 * (((limb)((int)input[1])) * ((int)input[7]) + ((limb)((int)input[3])) * ((int)input[5]))); output[9] = 2 * (((limb)((int)input[4])) * ((int)input[5]) + ((limb)((int)input[3])) * ((int)input[6]) + ((limb)((int)input[2])) * ((int)input[7]) + ((limb)((int)input[1])) * ((int)input[8]) + ((limb)((int)input[0])) * ((int)input[9])); output[10] = 2 * (((limb)((int)input[5])) * ((int)input[5]) + ((limb)((int)input[4])) * ((int)input[6]) + ((limb)((int)input[2])) * ((int)input[8]) + 2 * (((limb)((int)input[3])) * ((int)input[7]) + ((limb)((int)input[1])) * ((int)input[9]))); output[11] = 2 * (((limb)((int)input[5])) * ((int)input[6]) + ((limb)((int)input[4])) * ((int)input[7]) + ((limb)((int)input[3])) * ((int)input[8]) + ((limb)((int)input[2])) * ((int)input[9])); output[12] = ((limb)((int)input[6])) * ((int)input[6]) + 2 * (((limb)((int)input[4])) * ((int)input[8]) + 2 * (((limb)((int)input[5])) * ((int)input[7]) + ((limb)((int)input[3])) * ((int)input[9]))); output[13] = 2 * (((limb)((int)input[6])) * ((int)input[7]) + ((limb)((int)input[5])) * ((int)input[8]) + ((limb)((int)input[4])) * ((int)input[9])); output[14] = 2 * (((limb)((int)input[7])) * ((int)input[7]) + ((limb)((int)input[6])) * ((int)input[8]) + 2 * ((limb)((int)input[5])) * ((int)input[9])); output[15] = 2 * (((limb)((int)input[7])) * ((int)input[8]) + ((limb)((int)input[6])) * ((int)input[9])); output[16] = ((limb)((int)input[8])) * ((int)input[8]) + 4 * ((limb)((int)input[7])) * ((int)input[9]); output[17] = 2 * ((limb)((int)input[8])) * ((int)input[9]); output[18] = 2 * ((limb)((int)input[9])) * ((int)input[9]); }
static void fsquare(limb *output, limb *input) { Long19 t = new Long19(); fsquare_inner(t.Items, input); freduce_degree(t.Items); freduce_coefficients(t.Items); memcpy(output, t.Items, sizeof(limb) * 10); }
/* A helpful wrapper around fproduct: output = in * in2. * * output must be distinct to both inputs. The output is reduced degree and * reduced coefficient. */ static void fmul(limb *output, limb *input, limb *in2) { Long19 t = new Long19(); fproduct(t.Items, input, in2); freduce_degree(t.Items); freduce_coefficients(t.Items); memcpy(output, t.Items, sizeof(limb) * 10); }
/* Input: Q, Q', Q-Q' * Output: 2Q, Q+Q' * * x2 z3: long form * x3 z3: long form * x z: short form, destroyed * xprime zprime: short form, destroyed * qmqp: short form, preserved */ static void fmonty(limb *x2, limb *z2, /* output 2Q */ limb *x3, limb *z3, /* output Q + Q' */ limb *x, limb *z, /* input Q */ limb *xprime, limb *zprime, /* input Q' */ limb *qmqp /* input Q - Q' */) { Long19 origx = new Long19(); Long19 origxprime = new Long19(); Long19 zzz = new Long19(); Long19 xx = new Long19(); Long19 zz = new Long19(); Long19 xxprime = new Long19(); Long19 zzprime = new Long19(); Long19 zzzprime = new Long19(); Long19 xxxprime = new Long19(); memcpy(origx.Items, x, 10 * sizeof(limb)); fsum(x, z); fdifference(z, origx.Items); // does x - z memcpy(origxprime.Items, xprime, sizeof(limb) * 10); fsum(xprime, zprime); fdifference(zprime, origxprime.Items); fproduct(xxprime.Items, xprime, z); fproduct(zzprime.Items, x, zprime); freduce_degree(xxprime.Items); freduce_coefficients(xxprime.Items); freduce_degree(zzprime.Items); freduce_coefficients(zzprime.Items); memcpy(origxprime.Items, xxprime.Items, sizeof(limb) * 10); fsum(xxprime.Items, zzprime.Items); fdifference(zzprime.Items, origxprime.Items); fsquare(xxxprime.Items, xxprime.Items); fsquare(zzzprime.Items, zzprime.Items); fproduct(zzprime.Items, zzzprime.Items, qmqp); freduce_degree(zzprime.Items); freduce_coefficients(zzprime.Items); memcpy(x3, xxxprime.Items, sizeof(limb) * 10); memcpy(z3, zzprime.Items, sizeof(limb) * 10); fsquare(xx.Items, x); fsquare(zz.Items, z); fproduct(x2, xx.Items, zz.Items); freduce_degree(x2); freduce_coefficients(x2); fdifference(zz.Items, xx.Items); // does zz = xx - zz fscalar_product(zzz.Items, zz.Items, 121665); /* No need to call freduce_degree here: * fscalar_product doesn't increase the degree of its input. */ freduce_coefficients(zzz.Items); fsum(zzz.Items, xx.Items); fproduct(z2, zz.Items, zzz.Items); freduce_degree(z2); freduce_coefficients(z2); }
/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave * them unchanged if 'iswap' is 0. Runs in data-invariant time to avoid * side-channel attacks. * * NOTE that this function requires that 'iswap' be 1 or 0; other values give * wrong results. Also, the two limb arrays must be in reduced-coefficient, * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped, * and all all values in a[0..9],b[0..9] must have magnitude less than * INT32_MAX. */ static void swap_conditional(limb *a, limb *b, limb iswap) { int swap = (int)-iswap; for (int i = 0; i < 10; ++i) { int x = swap & (((int)a[i]) ^ ((int)b[i])); a[i] = ((int)a[i]) ^ x; b[i] = ((int)b[i]) ^ x; } }
/* Take a little-endian, 32-byte number and expand it into polynomial form */ static void fexpand(limb *output, byte *input) { output[0] = (ReadLittleEndianInt32(input + 0) >> 0) & 0x3ffffff; output[1] = (ReadLittleEndianInt32(input + 3) >> 2) & 0x1ffffff; output[2] = (ReadLittleEndianInt32(input + 6) >> 3) & 0x3ffffff; output[3] = (ReadLittleEndianInt32(input + 9) >> 5) & 0x1ffffff; output[4] = (ReadLittleEndianInt32(input + 12) >> 6) & 0x3ffffff; output[5] = (ReadLittleEndianInt32(input + 16) >> 0) & 0x1ffffff; output[6] = (ReadLittleEndianInt32(input + 19) >> 1) & 0x3ffffff; output[7] = (ReadLittleEndianInt32(input + 22) >> 3) & 0x1ffffff; output[8] = (ReadLittleEndianInt32(input + 25) >> 4) & 0x3ffffff; output[9] = (ReadLittleEndianInt32(input + 28) >> 6) & 0x1ffffff; }
/* Reduce all coefficients of the short form input so that |x| < 2^26. * * On entry: |output[i]| < 2^62 */ static void freduce_coefficients(limb *output) { output[10] = 0; for (int i = 0; i < 10; i += 2) { limb over = div_by_2_26(output[i]); output[i] -= over << 26; output[i + 1] += over; over = div_by_2_25(output[i + 1]); output[i + 1] -= over << 25; output[i + 2] += over; } /* Now |output[10]| < 2 ^ 38 and all other coefficients are reduced. */ output[0] += output[10] << 4; output[0] += output[10] << 1; output[0] += output[10]; output[10] = 0; /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19 * 2^38 * So |over| will be no more than 77825 */ { limb over = div_by_2_26(output[0]); output[0] -= over << 26; output[1] += over; } /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 77825 * So |over| will be no more than 1. */ { /* output[1] fits in 32 bits, so we can use div_int_by_2_25 here. */ int over32 = div_s32_by_2_25((int)output[1]); output[1] -= over32 << 25; output[2] += over32; } /* Finally, output[0,1,3..9] are reduced, and output[2] is "nearly reduced": * we have |output[2]| <= 2^26. This is good enough for all of our math, * but it will require an extra freduce_coefficients before fcontract. */ }
crecip(limb *output, limb *z) { Long19 z2 = new Long19(); Long19 z9 = new Long19(); Long19 z11 = new Long19(); Long19 z2_5_0 = new Long19(); Long19 z2_10_0 = new Long19(); Long19 z2_20_0 = new Long19(); Long19 z2_50_0 = new Long19(); Long19 z2_100_0 = new Long19(); Long19 t0 = new Long19(); Long19 t1 = new Long19(); int i; /* 2 */ fsquare(z2.Items, z); /* 4 */ fsquare(t1.Items, z2.Items); /* 8 */ fsquare(t0.Items, t1.Items); /* 9 */ fmul(z9.Items, t0.Items, z); /* 11 */ fmul(z11.Items, z9.Items, z2.Items); /* 22 */ fsquare(t0.Items, z11.Items); /* 2^5 - 2^0 = 31 */ fmul(z2_5_0.Items, t0.Items, z9.Items); /* 2^6 - 2^1 */ fsquare(t0.Items, z2_5_0.Items); /* 2^7 - 2^2 */ fsquare(t1.Items, t0.Items); /* 2^8 - 2^3 */ fsquare(t0.Items, t1.Items); /* 2^9 - 2^4 */ fsquare(t1.Items, t0.Items); /* 2^10 - 2^5 */ fsquare(t0.Items, t1.Items); /* 2^10 - 2^0 */ fmul(z2_10_0.Items, t0.Items, z2_5_0.Items); /* 2^11 - 2^1 */ fsquare(t0.Items, z2_10_0.Items); /* 2^12 - 2^2 */ fsquare(t1.Items, t0.Items); /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0.Items, t1.Items); fsquare(t1.Items, t0.Items); } /* 2^20 - 2^0 */ fmul(z2_20_0.Items, t1.Items, z2_10_0.Items); /* 2^21 - 2^1 */ fsquare(t0.Items, z2_20_0.Items); /* 2^22 - 2^2 */ fsquare(t1.Items, t0.Items); /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0.Items, t1.Items); fsquare(t1.Items, t0.Items); } /* 2^40 - 2^0 */ fmul(t0.Items, t1.Items, z2_20_0.Items); /* 2^41 - 2^1 */ fsquare(t1.Items, t0.Items); /* 2^42 - 2^2 */ fsquare(t0.Items, t1.Items); /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1.Items, t0.Items); fsquare(t0.Items, t1.Items); } /* 2^50 - 2^0 */ fmul(z2_50_0.Items, t0.Items, z2_10_0.Items); /* 2^51 - 2^1 */ fsquare(t0.Items, z2_50_0.Items); /* 2^52 - 2^2 */ fsquare(t1.Items, t0.Items); /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0.Items, t1.Items); fsquare(t1.Items, t0.Items); } /* 2^100 - 2^0 */ fmul(z2_100_0.Items, t1.Items, z2_50_0.Items); /* 2^101 - 2^1 */ fsquare(t1.Items, z2_100_0.Items); /* 2^102 - 2^2 */ fsquare(t0.Items, t1.Items); /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1.Items, t0.Items); fsquare(t0.Items, t1.Items); } /* 2^200 - 2^0 */ fmul(t1.Items, t0.Items, z2_100_0.Items); /* 2^201 - 2^1 */ fsquare(t0.Items, t1.Items); /* 2^202 - 2^2 */ fsquare(t1.Items, t0.Items); /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0.Items, t1.Items); fsquare(t1.Items, t0.Items); } /* 2^250 - 2^0 */ fmul(t0.Items, t1.Items, z2_50_0.Items); /* 2^251 - 2^1 */ fsquare(t1.Items, t0.Items); /* 2^252 - 2^2 */ fsquare(t0.Items, t1.Items); /* 2^253 - 2^3 */ fsquare(t1.Items, t0.Items); /* 2^254 - 2^4 */ fsquare(t0.Items, t1.Items); /* 2^255 - 2^5 */ fsquare(t1.Items, t0.Items); /* 2^255 - 21 */ fmul(output, t1.Items, z11.Items); }
cmult(limb *resultx, limb *resultz, byte *n, limb *q) { Long19 a = new Long19(); Long19 b = new Long19(); Long19 c = new Long19(); Long19 d = new Long19(); b.Items[0] = 1; c.Items[0] = 1; limb *nqpqx = a.Items, nqpqz = b.Items, nqx = c.Items, nqz = d.Items, t; Long19 e = new Long19(); Long19 f = new Long19(); Long19 g = new Long19(); Long19 h = new Long19(); f.Items[0] = 1; h.Items[0] = 1; limb *nqpqx2 = e.Items, nqpqz2 = f.Items, nqx2 = g.Items, nqz2 = h.Items; memcpy(nqpqx, q, sizeof(limb) * 10); for (int i = 0; i < 32; ++i) { byte @byte = n[31 - i]; for (int j = 0; j < 8; ++j) { limb bit = @byte >> 7; swap_conditional(nqx, nqpqx, bit); swap_conditional(nqz, nqpqz, bit); fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); swap_conditional(nqx2, nqpqx2, bit); swap_conditional(nqz2, nqpqz2, bit); t = nqx; nqx = nqx2; nqx2 = t; t = nqz; nqz = nqz2; nqz2 = t; t = nqpqx; nqpqx = nqpqx2; nqpqx2 = t; t = nqpqz; nqpqz = nqpqz2; nqpqz2 = t; @byte <<= 1; } } memcpy(resultx, nqx, sizeof(limb) * 10); memcpy(resultz, nqz, sizeof(limb) * 10); }
/* Take a fully reduced polynomial form number and contract it into a * little-endian, 32-byte array */ static void fcontract(byte *output, limb *input) { int i; int j; int mask; int carry; for (j = 0; j < 2; ++j) { for (i = 0; i < 9; ++i) { if ((i & 1) == 1) { /* This calculation is a time-invariant way to make input[i] positive * by borrowing from the next-larger limb. */ mask = (int)(input[i]) >> 31; carry = -(((int)(input[i]) & mask) >> 25); input[i] = (int)(input[i]) + (carry << 25); input[i + 1] = (int)(input[i + 1]) - carry; } else { mask = (int)(input[i]) >> 31; carry = -(((int)(input[i]) & mask) >> 26); input[i] = (int)(input[i]) + (carry << 26); input[i + 1] = (int)(input[i + 1]) - carry; } } mask = (int)(input[9]) >> 31; carry = -(((int)(input[9]) & mask) >> 25); input[9] = (int)(input[9]) + (carry << 25); input[0] = (int)(input[0]) - (carry * 19); } /* The first borrow-propagation pass above ended with every limb * except (possibly) input[0] non-negative. * * Since each input limb except input[0] is decreased by at most 1 * by a borrow-propagation pass, the second borrow-propagation pass * could only have wrapped around to decrease input[0] again if the * first pass left input[0] negative *and* input[1] through input[9] * were all zero. In that case, input[1] is now 2^25 - 1, and this * last borrow-propagation step will leave input[1] non-negative. */ mask = (int)(input[0]) >> 31; carry = -(((int)(input[0]) & mask) >> 26); input[0] = (int)(input[0]) + (carry << 26); input[1] = (int)(input[1]) - carry; /* Both passes through the above loop, plus the last 0-to-1 step, are * necessary: if input[9] is -1 and input[0] through input[8] are 0, * negative values will remain in the array until the end. */ input[1] <<= 2; input[2] <<= 3; input[3] <<= 5; input[4] <<= 6; input[6] <<= 1; input[7] <<= 3; input[8] <<= 4; input[9] <<= 6; /* #define F(i, s) \ * output[s+0] |= input[i] & 0xff; \ * output[s+1] = (input[i] >> 8) & 0xff; \ * output[s+2] = (input[i] >> 16) & 0xff; \ * output[s+3] = (input[i] >> 24) & 0xff; * output[0] = 0; * output[16] = 0; * F(0,0); * F(1,3); * F(2,6); * F(3,9); * F(4,12); * F(5,16); * F(6,19); * F(7,22); * F(8,25); * F(9,28); #undef F*/ output[0] = (byte)input[0]; output[1] = (byte)(input[0] >> 8); output[2] = (byte)(input[0] >> 16); output[3] = (byte)(input[0] >> 24 | input[1]); output[4] = (byte)(input[1] >> 8); output[5] = (byte)(input[1] >> 16); output[6] = (byte)(input[1] >> 24 | input[2]); output[7] = (byte)(input[2] >> 8); output[8] = (byte)(input[2] >> 16); output[9] = (byte)(input[2] >> 24 | input[3]); output[10] = (byte)(input[3] >> 8); output[11] = (byte)(input[3] >> 16); output[12] = (byte)(input[3] >> 24 | input[4]); output[13] = (byte)(input[4] >> 8); output[14] = (byte)(input[4] >> 16); output[15] = (byte)(input[4] >> 24); output[16] = (byte)input[5]; output[17] = (byte)(input[5] >> 8); output[18] = (byte)(input[5] >> 16); output[19] = (byte)(input[5] >> 24 | input[6]); output[20] = (byte)(input[6] >> 8); output[21] = (byte)(input[6] >> 16); output[22] = (byte)(input[6] >> 24 | input[7]); output[23] = (byte)(input[7] >> 8); output[24] = (byte)(input[7] >> 16); output[25] = (byte)(input[7] >> 24 | input[8]); output[26] = (byte)(input[8] >> 8); output[27] = (byte)(input[8] >> 16); output[28] = (byte)(input[8] >> 24 | input[9]); output[29] = (byte)(input[9] >> 8); output[30] = (byte)(input[9] >> 16); output[31] = (byte)(input[9] >> 24); }
/* Multiply two numbers: output = in2 * in * * output must be distinct to both inputs. The inputs are reduced coefficient * form, the output is not. */ static void fproduct(limb *output, limb *in2, limb *input) { output[0] = ((limb)((int)in2[0])) * ((int)input[0]); output[1] = ((limb)((int)in2[0])) * ((int)input[1]) + ((limb)((int)in2[1])) * ((int)input[0]); output[2] = 2 * ((limb)((int)in2[1])) * ((int)input[1]) + ((limb)((int)in2[0])) * ((int)input[2]) + ((limb)((int)in2[2])) * ((int)input[0]); output[3] = ((limb)((int)in2[1])) * ((int)input[2]) + ((limb)((int)in2[2])) * ((int)input[1]) + ((limb)((int)in2[0])) * ((int)input[3]) + ((limb)((int)in2[3])) * ((int)input[0]); output[4] = ((limb)((int)in2[2])) * ((int)input[2]) + 2 * (((limb)((int)in2[1])) * ((int)input[3]) + ((limb)((int)in2[3])) * ((int)input[1])) + ((limb)((int)in2[0])) * ((int)input[4]) + ((limb)((int)in2[4])) * ((int)input[0]); output[5] = ((limb)((int)in2[2])) * ((int)input[3]) + ((limb)((int)in2[3])) * ((int)input[2]) + ((limb)((int)in2[1])) * ((int)input[4]) + ((limb)((int)in2[4])) * ((int)input[1]) + ((limb)((int)in2[0])) * ((int)input[5]) + ((limb)((int)in2[5])) * ((int)input[0]); output[6] = 2 * (((limb)((int)in2[3])) * ((int)input[3]) + ((limb)((int)in2[1])) * ((int)input[5]) + ((limb)((int)in2[5])) * ((int)input[1])) + ((limb)((int)in2[2])) * ((int)input[4]) + ((limb)((int)in2[4])) * ((int)input[2]) + ((limb)((int)in2[0])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[0]); output[7] = ((limb)((int)in2[3])) * ((int)input[4]) + ((limb)((int)in2[4])) * ((int)input[3]) + ((limb)((int)in2[2])) * ((int)input[5]) + ((limb)((int)in2[5])) * ((int)input[2]) + ((limb)((int)in2[1])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[1]) + ((limb)((int)in2[0])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[0]); output[8] = ((limb)((int)in2[4])) * ((int)input[4]) + 2 * (((limb)((int)in2[3])) * ((int)input[5]) + ((limb)((int)in2[5])) * ((int)input[3]) + ((limb)((int)in2[1])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[1])) + ((limb)((int)in2[2])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[2]) + ((limb)((int)in2[0])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[0]); output[9] = ((limb)((int)in2[4])) * ((int)input[5]) + ((limb)((int)in2[5])) * ((int)input[4]) + ((limb)((int)in2[3])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[3]) + ((limb)((int)in2[2])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[2]) + ((limb)((int)in2[1])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[1]) + ((limb)((int)in2[0])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[0]); output[10] = 2 * (((limb)((int)in2[5])) * ((int)input[5]) + ((limb)((int)in2[3])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[3]) + ((limb)((int)in2[1])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[1])) + ((limb)((int)in2[4])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[4]) + ((limb)((int)in2[2])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[2]); output[11] = ((limb)((int)in2[5])) * ((int)input[6]) + ((limb)((int)in2[6])) * ((int)input[5]) + ((limb)((int)in2[4])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[4]) + ((limb)((int)in2[3])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[3]) + ((limb)((int)in2[2])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[2]); output[12] = ((limb)((int)in2[6])) * ((int)input[6]) + 2 * (((limb)((int)in2[5])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[5]) + ((limb)((int)in2[3])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[3])) + ((limb)((int)in2[4])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[4]); output[13] = ((limb)((int)in2[6])) * ((int)input[7]) + ((limb)((int)in2[7])) * ((int)input[6]) + ((limb)((int)in2[5])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[5]) + ((limb)((int)in2[4])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[4]); output[14] = 2 * (((limb)((int)in2[7])) * ((int)input[7]) + ((limb)((int)in2[5])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[5])) + ((limb)((int)in2[6])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[6]); output[15] = ((limb)((int)in2[7])) * ((int)input[8]) + ((limb)((int)in2[8])) * ((int)input[7]) + ((limb)((int)in2[6])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[6]); output[16] = ((limb)((int)in2[8])) * ((int)input[8]) + 2 * (((limb)((int)in2[7])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[7])); output[17] = ((limb)((int)in2[8])) * ((int)input[9]) + ((limb)((int)in2[9])) * ((int)input[8]); output[18] = 2 * ((limb)((int)in2[9])) * ((int)input[9]); }