/// <remarks> /// Multiply two numbers. The output is in reduced form, the inputs need not be. /// </remarks> private static Long10 Mul(Long10 XY, Long10 X, Long10 Y) { // sahn0: // Using local variables to avoid class access. // This seem to improve performance a bit... long x0 = X.N0, x1 = X.N1, x2 = X.N2, x3 = X.N3, x4 = X.N4, x5 = X.N5, x6 = X.N6, x7 = X.N7, x8 = X.N8, x9 = X.N9; long y0 = Y.N0, y1 = Y.N1, y2 = Y.N2, y3 = Y.N3, y4 = Y.N4, y5 = Y.N5, y6 = Y.N6, y7 = Y.N7, y8 = Y.N8, y9 = Y.N9; long t; t = (x0 * y8) + (x2 * y6) + (x4 * y4) + (x6 * y2) + (x8 * y0) + 2 * ((x1 * y7) + (x3 * y5) + (x5 * y3) + (x7 * y1)) + 38 * (x9 * y9); XY.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y9) + (x1 * y8) + (x2 * y7) + (x3 * y6) + (x4 * y5) + (x5 * y4) + (x6 * y3) + (x7 * y2) + (x8 * y1) + (x9 * y0); XY.N9 = (t & ((1 << 25) - 1)); t = (x0 * y0) + 19 * ((t >> 25) + (x2 * y8) + (x4 * y6) + (x6 * y4) + (x8 * y2)) + 38 * ((x1 * y9) + (x3 * y7) + (x5 * y5) + (x7 * y3) + (x9 * y1)); XY.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y1) + (x1 * y0) + 19 * ((x2 * y9) + (x3 * y8) + (x4 * y7) + (x5 * y6) + (x6 * y5) + (x7 * y4) + (x8 * y3) + (x9 * y2)); XY.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y2) + (x2 * y0) + 19 * ((x4 * y8) + (x6 * y6) + (x8 * y4)) + 2 * (x1 * y1) + 38 * ((x3 * y9) + (x5 * y7) + (x7 * y5) + (x9 * y3)); XY.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y3) + (x1 * y2) + (x2 * y1) + (x3 * y0) + 19 * ((x4 * y9) + (x5 * y8) + (x6 * y7) + (x7 * y6) + (x8 * y5) + (x9 * y4)); XY.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y4) + (x2 * y2) + (x4 * y0) + 19 * ((x6 * y8) + (x8 * y6)) + 2 * ((x1 * y3) + (x3 * y1)) + 38 * ((x5 * y9) + (x7 * y7) + (x9 * y5)); XY.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y5) + (x1 * y4) + (x2 * y3) + (x3 * y2) + (x4 * y1) + (x5 * y0) + 19 * ((x6 * y9) + (x7 * y8) + (x8 * y7) + (x9 * y6)); XY.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y6) + (x2 * y4) + (x4 * y2) + (x6 * y0) + 19 * (x8 * y8) + 2 * ((x1 * y5) + (x3 * y3) + (x5 * y1)) + 38 * ((x7 * y9) + (x9 * y7)); XY.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y7) + (x1 * y6) + (x2 * y5) + (x3 * y4) + (x4 * y3) + (x5 * y2) + (x6 * y1) + (x7 * y0) + 19 * ((x8 * y9) + (x9 * y8)); XY.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + XY.N8; XY.N8 = (t & ((1 << 26) - 1)); XY.N9 += (t >> 26); return(XY); }
/// <summary> /// Multiply a number by a small integer in range -185861411 .. 185861411. /// The output is in reduced form, the input x need not be. x and xy may point /// to the same buffer. /// </summary> private static void MulSmall(Long10 xy, Long10 x, long y) { var t = x._8 * y; xy._8 = t & ((1 << 26) - 1); t = (t >> 26) + x._9 * y; xy._9 = t & ((1 << 25) - 1); t = 19 * (t >> 25) + x._0 * y; xy._0 = t & ((1 << 26) - 1); t = (t >> 26) + x._1 * y; xy._1 = t & ((1 << 25) - 1); t = (t >> 25) + x._2 * y; xy._2 = t & ((1 << 26) - 1); t = (t >> 26) + x._3 * y; xy._3 = t & ((1 << 25) - 1); t = (t >> 25) + x._4 * y; xy._4 = t & ((1 << 26) - 1); t = (t >> 26) + x._5 * y; xy._5 = t & ((1 << 25) - 1); t = (t >> 25) + x._6 * y; xy._6 = t & ((1 << 26) - 1); t = (t >> 26) + x._7 * y; xy._7 = t & ((1 << 25) - 1); t = (t >> 25) + xy._8; xy._8 = t & ((1 << 26) - 1); xy._9 += t >> 26; }
/// <summary> /// Multiply a number by a small integer in range -185861411 .. 185861411. /// The output is in reduced form, the input x need not be. x and xy may point /// to the same buffer. /// </summary> private void MulSmall(Long10 xy, Long10 x, long y) { var temp = (x.N8 * y); xy.N8 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N9 * y); xy.N9 = (temp & ((1 << 25) - 1)); temp = 19 * (temp >> 25) + (x.N0 * y); xy.N0 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N1 * y); xy.N1 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N2 * y); xy.N2 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N3 * y); xy.N3 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N4 * y); xy.N4 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N5 * y); xy.N5 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N6 * y); xy.N6 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N7 * y); xy.N7 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + xy.N8; xy.N8 = (temp & ((1 << 26) - 1)); xy.N9 += (temp >> 26); }
/// <remarks> /// Square a number. Optimization of mul25519(x2, x, x) /// </remarks> private static Long10 Sqr(Long10 X2, Long10 X) { long x0 = X.N0, x1 = X.N1, x2 = X.N2, x3 = X.N3, x4 = X.N4, x5 = X.N5, x6 = X.N6, x7 = X.N7, x8 = X.N8, x9 = X.N9; long t; t = (x4 * x4) + 2 * ((x0 * x8) + (x2 * x6)) + 38 * (x9 * x9) + 4 * ((x1 * x7) + (x3 * x5)); X2.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x9) + (x1 * x8) + (x2 * x7) + (x3 * x6) + (x4 * x5)); X2.N9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (x0 * x0) + 38 * ((x2 * x8) + (x4 * x6) + (x5 * x5)) + 76 * ((x1 * x9) + (x3 * x7)); X2.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * (x0 * x1) + 38 * ((x2 * x9) + (x3 * x8) + (x4 * x7) + (x5 * x6)); X2.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x6 * x6) + 2 * ((x0 * x2) + (x1 * x1)) + 38 * (x4 * x8) + 76 * ((x3 * x9) + (x5 * x7)); X2.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x3) + (x1 * x2)) + 38 * ((x4 * x9) + (x5 * x8) + (x6 * x7)); X2.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x2 * x2) + 2 * (x0 * x4) + 38 * ((x6 * x8) + (x7 * x7)) + 4 * (x1 * x3) + 76 * (x5 * x9); X2.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x5) + (x1 * x4) + (x2 * x3)) + 38 * ((x6 * x9) + (x7 * x8)); X2.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x8 * x8) + 2 * ((x0 * x6) + (x2 * x4) + (x3 * x3)) + 4 * (x1 * x5) + 76 * (x7 * x9); X2.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x7) + (x1 * x6) + (x2 * x5) + (x3 * x4)) + 38 * (x8 * x9); X2.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + X2.N8; X2.N8 = (t & ((1 << 26) - 1)); X2.N9 += (t >> 26); return(X2); }
/* Multiply a number by a small integer in range -185861411 .. 185861411. * The output is in reduced form, the input x need not be. x and xy may point * to the same buffer. */ private static Long10 mul_small(Long10 xy, Long10 x, long y) { long t; t = (x._8 * y); xy._8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x._9 * y); xy._9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (x._0 * y); xy._0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x._1 * y); xy._1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x._2 * y); xy._2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x._3 * y); xy._3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x._4 * y); xy._4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x._5 * y); xy._5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x._6 * y); xy._6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x._7 * y); xy._7 = (t & ((1 << 25) - 1)); t = (t >> 25) + xy._8; xy._8 = (t & ((1 << 26) - 1)); xy._9 += (t >> 26); return(xy); }
/// <remarks> /// Multiply a number by a small integer in range -185861411 .. 185861411. /// The output is in reduced form, the input x need not be. x and xy may point /// to the same buffer. /// </remarks> private static Long10 MulSmall(Long10 XY, Long10 X, long Y) { long t; t = (X.N8 * Y); XY.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N9 * Y); XY.N9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (X.N0 * Y); XY.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N1 * Y); XY.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N2 * Y); XY.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N3 * Y); XY.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N4 * Y); XY.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N5 * Y); XY.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N6 * Y); XY.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N7 * Y); XY.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + XY.N8; XY.N8 = (t & ((1 << 26) - 1)); XY.N9 += (t >> 26); return(XY); }
/// <summary> /// Square a number. Optimization of Multiply(x2, x, x) /// </summary> static void Square(Long10 xsqr, Long10 x) { long x0 = x.N0, x1 = x.N1, x2 = x.N2, x3 = x.N3, x4 = x.N4, x5 = x.N5, x6 = x.N6, x7 = x.N7, x8 = x.N8, x9 = x.N9; long t = x4 * x4 + 2 * (x0 * x8 + x2 * x6) + 38 * (x9 * x9) + 4 * (x1 * x7 + x3 * x5); xsqr.N8 = t & ((1 << 26) - 1); t = (t >> 26) + 2 * (x0 * x9 + x1 * x8 + x2 * x7 + x3 * x6 + x4 * x5); xsqr.N9 = t & ((1 << 25) - 1); t = 19 * (t >> 25) + x0 * x0 + 38 * (x2 * x8 + x4 * x6 + x5 * x5) + 76 * (x1 * x9 + x3 * x7); xsqr.N0 = t & ((1 << 26) - 1); t = (t >> 26) + 2 * (x0 * x1) + 38 * (x2 * x9 + x3 * x8 + x4 * x7 + x5 * x6); xsqr.N1 = t & ((1 << 25) - 1); t = (t >> 25) + 19 * (x6 * x6) + 2 * (x0 * x2 + x1 * x1) + 38 * (x4 * x8) + 76 * (x3 * x9 + x5 * x7); xsqr.N2 = t & ((1 << 26) - 1); t = (t >> 26) + 2 * (x0 * x3 + x1 * x2) + 38 * (x4 * x9 + x5 * x8 + x6 * x7); xsqr.N3 = t & ((1 << 25) - 1); t = (t >> 25) + x2 * x2 + 2 * (x0 * x4) + 38 * (x6 * x8 + x7 * x7) + 4 * (x1 * x3) + 76 * (x5 * x9); xsqr.N4 = t & ((1 << 26) - 1); t = (t >> 26) + 2 * (x0 * x5 + x1 * x4 + x2 * x3) + 38 * (x6 * x9 + x7 * x8); xsqr.N5 = t & ((1 << 25) - 1); t = (t >> 25) + 19 * (x8 * x8) + 2 * (x0 * x6 + x2 * x4 + x3 * x3) + 4 * (x1 * x5) + 76 * (x7 * x9); xsqr.N6 = t & ((1 << 26) - 1); t = (t >> 26) + 2 * (x0 * x7 + x1 * x6 + x2 * x5 + x3 * x4) + 38 * (x8 * x9); xsqr.N7 = t & ((1 << 25) - 1); t = (t >> 25) + xsqr.N8; xsqr.N8 = t & ((1 << 26) - 1); xsqr.N9 += t >> 26; }
private static byte[] Pack(Long10 value) { var data = new byte[KeySize]; int ld = (IsOverflow(value) ? 1 : 0) - ((value.N9 < 0) ? 1 : 0); int ud = ld * -(P25 + 1); ld *= 19; long t = ld + value.N0 + (value.N1 << 26); data[0] = (byte)t; data[1] = (byte)(t >> 8); data[2] = (byte)(t >> 16); data[3] = (byte)(t >> 24); t = (t >> 32) + (value.N2 << 19); data[4] = (byte)t; data[5] = (byte)(t >> 8); data[6] = (byte)(t >> 16); data[7] = (byte)(t >> 24); t = (t >> 32) + (value.N3 << 13); data[8] = (byte)t; data[9] = (byte)(t >> 8); data[10] = (byte)(t >> 16); data[11] = (byte)(t >> 24); t = (t >> 32) + (value.N4 << 6); data[12] = (byte)t; data[13] = (byte)(t >> 8); data[14] = (byte)(t >> 16); data[15] = (byte)(t >> 24); t = (t >> 32) + value.N5 + (value.N6 << 25); data[16] = (byte)t; data[17] = (byte)(t >> 8); data[18] = (byte)(t >> 16); data[19] = (byte)(t >> 24); t = (t >> 32) + (value.N7 << 19); data[20] = (byte)t; data[21] = (byte)(t >> 8); data[22] = (byte)(t >> 16); data[23] = (byte)(t >> 24); t = (t >> 32) + (value.N8 << 12); data[24] = (byte)t; data[25] = (byte)(t >> 8); data[26] = (byte)(t >> 16); data[27] = (byte)(t >> 24); t = (t >> 32) + ((value.N9 + ud) << 6); data[28] = (byte)t; data[29] = (byte)(t >> 8); data[30] = (byte)(t >> 16); data[31] = (byte)(t >> 24); return(data); }
/* Check if reduced-form input >= 2^255-19 */ private static bool is_overflow(Long10 x) { return(( ((x._0 > P26 - 19)) && ((x._1 & x._3 & x._5 & x._7 & x._9) == P25) && ((x._2 & x._4 & x._6 & x._8) == P26) ) || (x._9 > P25)); }
/// <summary> /// Y^2 = X^3 + 486662 X^2 + X /// t is a temporary /// </summary> private static void x_to_y2(Long10 t, Long10 y2, Long10 x) { Sqr(t, x); MulSmall(y2, x, 486662); Add(t, t, y2); t._0++; Mul(y2, t, x); }
/// <remarks> /// Y^2 = X^3 + 486662 X^2 + X /// t is a temporary /// </remarks> private static void XtoY2(Long10 T, Long10 Y2, Long10 X) { Sqr(T, X); MulSmall(Y2, X, 486662); AddXY(T, T, Y2); T.N0++; Mul(Y2, T, X); }
private static void Sub(Long10 XY, Long10 X, Long10 Y) { XY.N0 = X.N0 - Y.N0; XY.N1 = X.N1 - Y.N1; XY.N2 = X.N2 - Y.N2; XY.N3 = X.N3 - Y.N3; XY.N4 = X.N4 - Y.N4; XY.N5 = X.N5 - Y.N5; XY.N6 = X.N6 - Y.N6; XY.N7 = X.N7 - Y.N7; XY.N8 = X.N8 - Y.N8; XY.N9 = X.N9 - Y.N9; }
/// <remarks> /// Add/subtract two numbers. The inputs must be in reduced form, and the /// output isn't, so to do another addition or subtraction on the output, /// first multiply it by one to reduce it. /// </remarks> private static void AddXY(Long10 XY, Long10 X, Long10 Y) { XY.N0 = X.N0 + Y.N0; XY.N1 = X.N1 + Y.N1; XY.N2 = X.N2 + Y.N2; XY.N3 = X.N3 + Y.N3; XY.N4 = X.N4 + Y.N4; XY.N5 = X.N5 + Y.N5; XY.N6 = X.N6 + Y.N6; XY.N7 = X.N7 + Y.N7; XY.N8 = X.N8 + Y.N8; XY.N9 = X.N9 + Y.N9; }
/// <remarks> /// Set a number to value, which must be in range -185861411 .. 185861411 /// </remarks> private static void Set(Long10 NumOut, int NumIn) { NumOut.N0 = NumIn; NumOut.N1 = 0; NumOut.N2 = 0; NumOut.N3 = 0; NumOut.N4 = 0; NumOut.N5 = 0; NumOut.N6 = 0; NumOut.N7 = 0; NumOut.N8 = 0; NumOut.N9 = 0; }
/// <remarks> /// Copy a number /// </remarks> private static void Copy(Long10 NumOut, Long10 NumIn) { NumOut.N0 = NumIn.N0; NumOut.N1 = NumIn.N1; NumOut.N2 = NumIn.N2; NumOut.N3 = NumIn.N3; NumOut.N4 = NumIn.N4; NumOut.N5 = NumIn.N5; NumOut.N6 = NumIn.N6; NumOut.N7 = NumIn.N7; NumOut.N8 = NumIn.N8; NumOut.N9 = NumIn.N9; }
/// <summary> /// Y^2 = X^3 + 486662 X^2 + X /// </summary> /// <param name="y2">output</param> /// <param name="x">X</param> /// <param name="temp">temporary</param> private void CurveEquationInline(Long10 y2, Long10 x, Long10 temp) { Square(temp, x); MulSmall(y2, x, 486662); Add(temp, temp, y2); temp.N0++; Multiply(y2, temp, x); }
/* Add/subtract two numbers. The inputs must be in reduced form, and the * output isn't, so to do another addition or subtraction on the output, * first multiply it by one to reduce it. */ private static void add(Long10 xy, Long10 x, Long10 y) { xy._0 = x._0 + y._0; xy._1 = x._1 + y._1; xy._2 = x._2 + y._2; xy._3 = x._3 + y._3; xy._4 = x._4 + y._4; xy._5 = x._5 + y._5; xy._6 = x._6 + y._6; xy._7 = x._7 + y._7; xy._8 = x._8 + y._8; xy._9 = x._9 + y._9; }
/// <summary> /// Check if reduced-form input >= 2^255-19 /// </summary> private bool IsOverflow(Long10 x) { return(( (x.N0 > P26 - 19) & ((x.N1 & x.N3 & x.N5 & x.N7 & x.N9) == P25) & ((x.N2 & x.N4 & x.N6 & x.N8) == P26) ) || (x.N9 > P25)); }
private static bool IsOverflow(Long10 value) { return(( ((value.N0 > P26 - 19)) & ((value.N1 & value.N3 & value.N5 & value.N7 & value.N9) == P25) & ((value.N2 & value.N4 & value.N6 & value.N8) == P26) ) || (value.N9 > P25)); }
private static void sub(Long10 xy, Long10 x, Long10 y) { xy._0 = x._0 - y._0; xy._1 = x._1 - y._1; xy._2 = x._2 - y._2; xy._3 = x._3 - y._3; xy._4 = x._4 - y._4; xy._5 = x._5 - y._5; xy._6 = x._6 - y._6; xy._7 = x._7 - y._7; xy._8 = x._8 - y._8; xy._9 = x._9 - y._9; }
/* Y^2 = X^3 + 486662 X^2 + X * t is a temporary */ private static void x_to_y2(Long10 t, Long10 y2, Long10 x) { sqr(t, x); mul_small(y2, x, 486662); add(t, t, y2); t._0++; mul(y2, t, x); }
/* Copy a number */ public static void cpy(Long10 outVal, Long10 inVal) { outVal._0 = inVal._0; outVal._1 = inVal._1; outVal._2 = inVal._2; outVal._3 = inVal._3; outVal._4 = inVal._4; outVal._5 = inVal._5; outVal._6 = inVal._6; outVal._7 = inVal._7; outVal._8 = inVal._8; outVal._9 = inVal._9; }
/// <remarks> /// Convert from internal format to little-endian byte format. /// The number must be in a reduced form which is output by the following ops: /// unpack, mul, sqr /// set -- if input in range 0 .. P25 /// If you're unsure if the number is reduced, first multiply it by 1. /// </remarks> private static void Pack(Long10 X, byte[] M) { int ld = 0, ud = 0; long t; ld = (IsOverflow(X) ? 1 : 0) - ((X.N9 < 0) ? 1 : 0); ud = ld * -(P25 + 1); ld *= 19; t = ld + X.N0 + (X.N1 << 26); M[0] = (byte)t; M[1] = (byte)(t >> 8); M[2] = (byte)(t >> 16); M[3] = (byte)(t >> 24); t = (t >> 32) + (X.N2 << 19); M[4] = (byte)t; M[5] = (byte)(t >> 8); M[6] = (byte)(t >> 16); M[7] = (byte)(t >> 24); t = (t >> 32) + (X.N3 << 13); M[8] = (byte)t; M[9] = (byte)(t >> 8); M[10] = (byte)(t >> 16); M[11] = (byte)(t >> 24); t = (t >> 32) + (X.N4 << 6); M[12] = (byte)t; M[13] = (byte)(t >> 8); M[14] = (byte)(t >> 16); M[15] = (byte)(t >> 24); t = (t >> 32) + X.N5 + (X.N6 << 25); M[16] = (byte)t; M[17] = (byte)(t >> 8); M[18] = (byte)(t >> 16); M[19] = (byte)(t >> 24); t = (t >> 32) + (X.N7 << 19); M[20] = (byte)t; M[21] = (byte)(t >> 8); M[22] = (byte)(t >> 16); M[23] = (byte)(t >> 24); t = (t >> 32) + (X.N8 << 12); M[24] = (byte)t; M[25] = (byte)(t >> 8); M[26] = (byte)(t >> 16); M[27] = (byte)(t >> 24); t = (t >> 32) + ((X.N9 + ud) << 6); M[28] = (byte)t; M[29] = (byte)(t >> 8); M[30] = (byte)(t >> 16); M[31] = (byte)(t >> 24); }
/* Set a number to value, which must be in range -185861411 .. 185861411 */ private static void set(Long10 outVal, int inVal) { outVal._0 = inVal; outVal._1 = 0; outVal._2 = 0; outVal._3 = 0; outVal._4 = 0; outVal._5 = 0; outVal._6 = 0; outVal._7 = 0; outVal._8 = 0; outVal._9 = 0; }
/* B = 2 * Q where * X(B) = bx/bz * X(Q) = (t3+t4)/(t3-t4) * clobbers t1 and t2, preserves t3 and t4 */ private static void mont_dbl(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 bx, Long10 bz) { sqr(t1, t3); sqr(t2, t4); mul(bx, t1, t2); sub(t2, t1, t2); mul_small(bz, t2, 121665); add(t1, t1, bz); mul(bz, t1, t2); }
/* A = P + Q where * X(A) = ax/az * X(P) = (t1+t2)/(t1-t2) * X(Q) = (t3+t4)/(t3-t4) * X(P-Q) = dx * clobbers t1 and t2, preserves t3 and t4 */ private static void mont_add(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 ax, Long10 az, Long10 dx) { mul(ax, t2, t3); mul(az, t1, t4); add(t1, ax, az); sub(t2, ax, az); sqr(ax, t1); sqr(t1, t2); mul(az, t1, dx); }
/* A = P + Q where * X(A) = ax/az * X(P) = (t1+t2)/(t1-t2) * X(Q) = (t3+t4)/(t3-t4) * X(P-Q) = dx * clobbers t1 and t2, preserves t3 and t4 */ private void MontyAdd(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 ax, Long10 az, Long10 dx) { Multiply(ax, t2, t3); Multiply(az, t1, t4); Add(t1, ax, az); Sub(t2, ax, az); Square(ax, t1); Square(t1, t2); Multiply(az, t1, dx); }
/// <summary> /// A = P + Q where /// X(A) = ax/az /// X(P) = (t1+t2)/(t1-t2) /// X(Q) = (t3+t4)/(t3-t4) /// X(P-Q) = dx /// clobbers t1 and t2, preserves t3 and t4 /// </summary> private static void mont_add(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 ax, Long10 az, Long10 dx) { Mul(ax, t2, t3); Mul(az, t1, t4); Add(t1, ax, az); Sub(t2, ax, az); Sqr(ax, t1); Sqr(t1, t2); Mul(az, t1, dx); }
/// <summary> /// B = 2 * Q where /// X(B) = bx/bz /// X(Q) = (t3+t4)/(t3-t4) /// clobbers t1 and t2, preserves t3 and t4 /// </summary> private static void mont_dbl(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 bx, Long10 bz) { Sqr(t1, t3); Sqr(t2, t4); Mul(bx, t1, t2); Sub(t2, t1, t2); MulSmall(bz, t2, 121665); Add(t1, t1, bz); Mul(bz, t1, t2); }
/* B = 2 * Q where * X(B) = bx/bz * X(Q) = (t3+t4)/(t3-t4) * clobbers t1 and t2, preserves t3 and t4 */ private void MontyDouble(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 bx, Long10 bz) { Square(t1, t3); Square(t2, t4); Multiply(bx, t1, t2); Sub(t2, t1, t2); MulSmall(bz, t2, 121665); Add(t1, t1, bz); Multiply(bz, t1, t2); }
/// <remarks> /// P = kG and s = sign(P)/k /// </remarks> private static void Core(byte[] Px, byte[] S, byte[] K, byte[] Gx) { Long10 dx = new Long10(), t1 = new Long10(), t2 = new Long10(), t3 = new Long10(), t4 = new Long10(); Long10[] x = new Long10[] { new Long10(), new Long10() }, z = new Long10[] { new Long10(), new Long10() }; int i, j; // unpack the base if (Gx != null) Unpack(dx, Gx); else Set(dx, 9); // 0G = point-at-infinity Set(x[0], 1); Set(z[0], 0); // 1G = G Copy(x[1], dx); Set(z[1], 1); for (i = 32; i-- != 0; ) { if (i == 0) i = 0; for (j = 8; j-- != 0; ) { // swap arguments depending on bit int bit1 = (K[i] & 0xFF) >> j & 1; int bit0 = ~(K[i] & 0xFF) >> j & 1; Long10 ax = x[bit0]; Long10 az = z[bit0]; Long10 bx = x[bit1]; Long10 bz = z[bit1]; // a' = a + b // b' = 2 b MontPrep(t1, t2, ax, az); MontPrep(t3, t4, bx, bz); MontAdd(t1, t2, t3, t4, ax, az, dx); MontDbl(t1, t2, t3, t4, bx, bz); } } Recip(t1, z[0], 0); Mul(dx, x[0], t1); Pack(dx, Px); // calculate s such that s abs(P) = G .. assumes G is std base point if (S != null) { XtoY2(t2, t1, dx); // t1 = Py^2 Recip(t3, z[1], 0); // where Q=P+G ... Mul(t2, x[1], t3); // t2 = Qx AddXY(t2, t2, dx); // t2 = Qx + Px t2.N0 += 9 + 486662; // t2 = Qx + Px + Gx + 486662 dx.N0 -= 9; // dx = Px - Gx Sqr(t3, dx); // t3 = (Px - Gx)^2 Mul(dx, t2, t3); // dx = t2 (Px - Gx)^2 Sub(dx, dx, t1); // dx = t2 (Px - Gx)^2 - Py^2 dx.N0 -= 39420360; // dx = t2 (Px - Gx)^2 - Py^2 - Gy^2 Mul(t1, dx, BASE_R2Y); // t1 = -Py if (IsNegative(t1) != 0) // sign is 1, so just copy Copy32(S, K); else // sign is -1, so negate MulaSmall(S, ORDER_TIMES_8, 0, K, 32, -1); // reduce s mod q (is this needed? do it just in case, it's fast anyway) // divmod((dstptr) t1, s, 32, order25519, 32); // take reciprocal of s mod q byte[] temp1 = new byte[32]; byte[] temp2 = new byte[64]; byte[] temp3 = new byte[64]; Copy32(temp1, ORDER); Copy32(S, Egcd32(temp2, temp3, S, temp1)); if ((S[31] & 0x80) != 0) MulaSmall(S, S, 0, ORDER, 32, 1); } }
/// <remarks> /// B = 2 * Q where: /// X(B) = bx/bz /// X(Q) = (t3+t4)/(t3-t4) /// clobbers t1 and t2, preserves t3 and t4 /// </remarks> private static void MontDbl(Long10 T1, Long10 T2, Long10 T3, Long10 T4, Long10 Bx, Long10 Bz) { Sqr(T1, T3); Sqr(T2, T4); Mul(Bx, T1, T2); Sub(T2, T1, T2); MulSmall(Bz, T2, 121665); AddXY(T1, T1, Bz); Mul(Bz, T1, T2); }
/// <remarks> /// t1 = ax + az /// t2 = ax - az /// </remarks> private static void MontPrep(Long10 T1, Long10 T2, Long10 Ax, Long10 Az) { AddXY(T1, Ax, Az); Sub(T2, Ax, Az); }
/// <remarks> /// Checks if x is "negative", requires reduced input /// </remarks> private static int IsNegative(Long10 X) { return (int)(((IsOverflow(X) || (X.N9 < 0)) ? 1 : 0) ^ (X.N0 & 1)); }
/// <remarks> /// Square a number. Optimization of mul25519(x2, x, x) /// </remarks> private static Long10 Sqr(Long10 X2, Long10 X) { long x0 = X.N0, x1 = X.N1, x2 = X.N2, x3 = X.N3, x4 = X.N4, x5 = X.N5, x6 = X.N6, x7 = X.N7, x8 = X.N8, x9 = X.N9; long t; t = (x4 * x4) + 2 * ((x0 * x8) + (x2 * x6)) + 38 * (x9 * x9) + 4 * ((x1 * x7) + (x3 * x5)); X2.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x9) + (x1 * x8) + (x2 * x7) + (x3 * x6) + (x4 * x5)); X2.N9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (x0 * x0) + 38 * ((x2 * x8) + (x4 * x6) + (x5 * x5)) + 76 * ((x1 * x9) + (x3 * x7)); X2.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * (x0 * x1) + 38 * ((x2 * x9) + (x3 * x8) + (x4 * x7) + (x5 * x6)); X2.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x6 * x6) + 2 * ((x0 * x2) + (x1 * x1)) + 38 * (x4 * x8) + 76 * ((x3 * x9) + (x5 * x7)); X2.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x3) + (x1 * x2)) + 38 * ((x4 * x9) + (x5 * x8) + (x6 * x7)); X2.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x2 * x2) + 2 * (x0 * x4) + 38 * ((x6 * x8) + (x7 * x7)) + 4 * (x1 * x3) + 76 * (x5 * x9); X2.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x5) + (x1 * x4) + (x2 * x3)) + 38 * ((x6 * x9) + (x7 * x8)); X2.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x8 * x8) + 2 * ((x0 * x6) + (x2 * x4) + (x3 * x3)) + 4 * (x1 * x5) + 76 * (x7 * x9); X2.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x7) + (x1 * x6) + (x2 * x5) + (x3 * x4)) + 38 * (x8 * x9); X2.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + X2.N8; X2.N8 = (t & ((1 << 26) - 1)); X2.N9 += (t >> 26); return X2; }
/// <summary> /// Signature verification primitive, calculates Y = vP + hG /// </summary> /// /// <param name="Y">Signature internal key</param> /// <param name="V">Signature value</param> /// <param name="H">Signature hash</param> /// <param name="P">Public key</param> internal static void Verify(byte[] Y, byte[] V, byte[] H, byte[] P) { // Y = v abs(P) + h G */ byte[] d = new byte[32]; Long10[] p = new Long10[] { new Long10(), new Long10() }, s = new Long10[] { new Long10(), new Long10() }, yx = new Long10[] { new Long10(), new Long10(), new Long10() }, yz = new Long10[] { new Long10(), new Long10(), new Long10() }, t1 = new Long10[] { new Long10(), new Long10(), new Long10() }, t2 = new Long10[] { new Long10(), new Long10(), new Long10() }; int vi = 0, hi = 0, di = 0, nvh = 0, i, j, k; // set p[0] to G and p[1] to P Set(p[0], 9); Unpack(p[1], P); XtoY2(t1[0], t2[0], p[1]); // t2[0] = Py^2 Sqrt(t1[0], t2[0]); // t1[0] = Py or -Py j = IsNegative(t1[0]); // ... check which t2[0].N0 += 39420360; // t2[0] = Py^2 + Gy^2 Mul(t2[1], BASE_2Y, t1[0]); // t2[1] = 2 Py Gy or -2 Py Gy Sub(t1[j], t2[0], t2[1]); // t1[0] = Py^2 + Gy^2 - 2 Py Gy AddXY(t1[1 - j], t2[0], t2[1]); // t1[1] = Py^2 + Gy^2 + 2 Py Gy Copy(t2[0], p[1]); // t2[0] = Px t2[0].N0 -= 9; // t2[0] = Px - Gx Sqr(t2[1], t2[0]); // t2[1] = (Px - Gx)^2 Recip(t2[0], t2[1], 0); // t2[0] = 1/(Px - Gx)^2 Mul(s[0], t1[0], t2[0]); // s[0] = t1[0]/(Px - Gx)^2 Sub(s[0], s[0], p[1]); // s[0] = t1[0]/(Px - Gx)^2 - Px s[0].N0 -= 9 + 486662; // s[0] = X(P+G) Mul(s[1], t1[1], t2[0]); // s[1] = t1[1]/(Px - Gx)^2 Sub(s[1], s[1], p[1]); // s[1] = t1[1]/(Px - Gx)^2 - Px s[1].N0 -= 9 + 486662; // s[1] = X(P-G) MulSmall(s[0], s[0], 1); // reduce s[0] MulSmall(s[1], s[1], 1); // reduce s[1] // prepare the chain for (i = 0; i < 32; i++) { vi = (vi >> 8) ^ (V[i] & 0xFF) ^ ((V[i] & 0xFF) << 1); hi = (hi >> 8) ^ (H[i] & 0xFF) ^ ((H[i] & 0xFF) << 1); nvh = ~(vi ^ hi); di = (nvh & (di & 0x80) >> 7) ^ vi; di ^= nvh & (di & 0x01) << 1; di ^= nvh & (di & 0x02) << 1; di ^= nvh & (di & 0x04) << 1; di ^= nvh & (di & 0x08) << 1; di ^= nvh & (di & 0x10) << 1; di ^= nvh & (di & 0x20) << 1; di ^= nvh & (di & 0x40) << 1; d[i] = (byte)di; } di = ((nvh & (di & 0x80) << 1) ^ vi) >> 8; // initialize state Set(yx[0], 1); Copy(yx[1], p[di]); Copy(yx[2], s[0]); Set(yz[0], 0); Set(yz[1], 1); Set(yz[2], 1); // y[0] is (even)P + (even)G // y[1] is (even)P + (odd)G if current d-bit is 0 // y[1] is (odd)P + (even)G if current d-bit is 1 // y[2] is (odd)P + (odd)G vi = 0; hi = 0; // and go for it! for (i = 32; i-- != 0; ) { vi = (vi << 8) | (V[i] & 0xFF); hi = (hi << 8) | (H[i] & 0xFF); di = (di << 8) | (d[i] & 0xFF); for (j = 8; j-- != 0; ) { MontPrep(t1[0], t2[0], yx[0], yz[0]); MontPrep(t1[1], t2[1], yx[1], yz[1]); MontPrep(t1[2], t2[2], yx[2], yz[2]); k = ((vi ^ vi >> 1) >> j & 1) + ((hi ^ hi >> 1) >> j & 1); MontDbl(yx[2], yz[2], t1[k], t2[k], yx[0], yz[0]); k = (di >> j & 2) ^ ((di >> j & 1) << 1); MontAdd(t1[1], t2[1], t1[k], t2[k], yx[1], yz[1], p[di >> j & 1]); MontAdd(t1[2], t2[2], t1[0], t2[0], yx[2], yz[2], s[((vi ^ hi) >> j & 2) >> 1]); } } k = (vi & 1) + (hi & 1); Recip(t1[0], yz[k], 0); Mul(t1[1], yx[k], t1[0]); Pack(t1[1], Y); }
/// <summary> /// P = kG and s = sign(P)/k /// </summary> static void Core(byte[] publicKey, byte[] signingKey, byte[] privateKey, byte[] peerPublicKey) { if (publicKey == null) throw new ArgumentNullException("publicKey"); if (publicKey.Length != 32) throw new ArgumentException(String.Format("publicKey must be 32 bytes long (but was {0} bytes long)", publicKey.Length), "publicKey"); if (signingKey != null && signingKey.Length != 32) throw new ArgumentException(String.Format("signingKey must be null or 32 bytes long (but was {0} bytes long)", signingKey.Length), "signingKey"); if (privateKey == null) throw new ArgumentNullException("privateKey"); if (privateKey.Length != 32) throw new ArgumentException(String.Format("privateKey must be 32 bytes long (but was {0} bytes long)", privateKey.Length), "privateKey"); if (peerPublicKey != null && peerPublicKey.Length != 32) throw new ArgumentException(String.Format("peerPublicKey must be null or 32 bytes long (but was {0} bytes long)", peerPublicKey.Length), "peerPublicKey"); Long10 dx = new Long10(), t1 = new Long10(), t2 = new Long10(), t3 = new Long10(), t4 = new Long10(); Long10[] x = { new Long10(), new Long10() }, z = { new Long10(), new Long10() }; /* unpack the base */ if (peerPublicKey != null) Unpack(dx, peerPublicKey); else Set(dx, 9); /* 0G = point-at-infinity */ Set(x[0], 1); Set(z[0], 0); /* 1G = G */ Copy(x[1], dx); Set(z[1], 1); for (int i = 32; i-- != 0; ) { for (int j = 8; j-- != 0; ) { /* swap arguments depending on bit */ int bit1 = (privateKey[i] & 0xFF) >> j & 1; int bit0 = ~(privateKey[i] & 0xFF) >> j & 1; Long10 ax = x[bit0]; Long10 az = z[bit0]; Long10 bx = x[bit1]; Long10 bz = z[bit1]; /* a' = a + b */ /* b' = 2 b */ MontyPrepare(t1, t2, ax, az); MontyPrepare(t3, t4, bx, bz); MontyAdd(t1, t2, t3, t4, ax, az, dx); MontyDouble(t1, t2, t3, t4, bx, bz); } } Reciprocal(t1, z[0], false); Multiply(dx, x[0], t1); Pack(dx, publicKey); /* calculate s such that s abs(P) = G .. assumes G is std base point */ if (signingKey != null) { CurveEquationInline(t1, dx, t2); /* t1 = Py^2 */ Reciprocal(t3, z[1], false); /* where Q=P+G ... */ Multiply(t2, x[1], t3); /* t2 = Qx */ Add(t2, t2, dx); /* t2 = Qx + Px */ t2.N0 += 9 + 486662; /* t2 = Qx + Px + Gx + 486662 */ dx.N0 -= 9; /* dx = Px - Gx */ Square(t3, dx); /* t3 = (Px - Gx)^2 */ Multiply(dx, t2, t3); /* dx = t2 (Px - Gx)^2 */ Sub(dx, dx, t1); /* dx = t2 (Px - Gx)^2 - Py^2 */ dx.N0 -= 39420360; /* dx = t2 (Px - Gx)^2 - Py^2 - Gy^2 */ Multiply(t1, dx, BaseR2Y); /* t1 = -Py */ if (IsNegative(t1) != 0) /* sign is 1, so just copy */ Copy32(privateKey, signingKey); else /* sign is -1, so negate */ MultiplyArraySmall(signingKey, OrderTimes8, 0, privateKey, 32, -1); /* reduce s mod q * (is this needed? do it just in case, it's fast anyway) */ //divmod((dstptr) t1, s, 32, order25519, 32); /* take reciprocal of s mod q */ var temp1 = new byte[32]; var temp2 = new byte[64]; var temp3 = new byte[64]; Copy32(Order, temp1); Copy32(Egcd32(temp2, temp3, signingKey, temp1), signingKey); if ((signingKey[31] & 0x80) != 0) MultiplyArraySmall(signingKey, signingKey, 0, Order, 32, 1); } }
/// <summary> /// Y^2 = X^3 + 486662 X^2 + X /// </summary> /// <param name="y2">output</param> /// <param name="x">X</param> /// <param name="temp">temporary</param> static void CurveEquationInline(Long10 y2, Long10 x, Long10 temp) { Square(temp, x); MulSmall(y2, x, 486662); Add(temp, temp, y2); temp.N0++; Multiply(y2, temp, x); }
/* B = 2 * Q where * X(B) = bx/bz * X(Q) = (t3+t4)/(t3-t4) * clobbers t1 and t2, preserves t3 and t4 */ static void MontyDouble(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 bx, Long10 bz) { Square(t1, t3); Square(t2, t4); Multiply(bx, t1, t2); Sub(t2, t1, t2); MulSmall(bz, t2, 121665); Add(t1, t1, bz); Multiply(bz, t1, t2); }
/// <summary> /// Square a number. Optimization of Multiply(x2, x, x) /// </summary> static void Square(Long10 xsqr, Long10 x) { long x0 = x.N0, x1 = x.N1, x2 = x.N2, x3 = x.N3, x4 = x.N4, x5 = x.N5, x6 = x.N6, x7 = x.N7, x8 = x.N8, x9 = x.N9; long t = (x4 * x4) + 2 * ((x0 * x8) + (x2 * x6)) + 38 * (x9 * x9) + 4 * ((x1 * x7) + (x3 * x5)); xsqr.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x9) + (x1 * x8) + (x2 * x7) + (x3 * x6) + (x4 * x5)); xsqr.N9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (x0 * x0) + 38 * ((x2 * x8) + (x4 * x6) + (x5 * x5)) + 76 * ((x1 * x9) + (x3 * x7)); xsqr.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * (x0 * x1) + 38 * ((x2 * x9) + (x3 * x8) + (x4 * x7) + (x5 * x6)); xsqr.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x6 * x6) + 2 * ((x0 * x2) + (x1 * x1)) + 38 * (x4 * x8) + 76 * ((x3 * x9) + (x5 * x7)); xsqr.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x3) + (x1 * x2)) + 38 * ((x4 * x9) + (x5 * x8) + (x6 * x7)); xsqr.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x2 * x2) + 2 * (x0 * x4) + 38 * ((x6 * x8) + (x7 * x7)) + 4 * (x1 * x3) + 76 * (x5 * x9); xsqr.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x5) + (x1 * x4) + (x2 * x3)) + 38 * ((x6 * x9) + (x7 * x8)); xsqr.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + 19 * (x8 * x8) + 2 * ((x0 * x6) + (x2 * x4) + (x3 * x3)) + 4 * (x1 * x5) + 76 * (x7 * x9); xsqr.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + 2 * ((x0 * x7) + (x1 * x6) + (x2 * x5) + (x3 * x4)) + 38 * (x8 * x9); xsqr.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + xsqr.N8; xsqr.N8 = (t & ((1 << 26) - 1)); xsqr.N9 += (t >> 26); }
/********************* Elliptic curve *********************/ /* y^2 = x^3 + 486662 x^2 + x over GF(2^255-19) */ /* t1 = ax + az * t2 = ax - az */ static void MontyPrepare(Long10 t1, Long10 t2, Long10 ax, Long10 az) { Add(t1, ax, az); Sub(t2, ax, az); }
/// <summary> /// Checks if x is "negative", requires reduced input /// </summary> /// <param name="x">must be reduced input</param> static int IsNegative(Long10 x) { return (int)(((IsOverflow(x) | (x.N9 < 0)) ? 1 : 0) ^ (x.N0 & 1)); }
/// <remarks> /// Multiply two numbers. The output is in reduced form, the inputs need not be. /// </remarks> private static Long10 Mul(Long10 XY, Long10 X, Long10 Y) { // sahn0: // Using local variables to avoid class access. // This seem to improve performance a bit... long x0 = X.N0, x1 = X.N1, x2 = X.N2, x3 = X.N3, x4 = X.N4, x5 = X.N5, x6 = X.N6, x7 = X.N7, x8 = X.N8, x9 = X.N9; long y0 = Y.N0, y1 = Y.N1, y2 = Y.N2, y3 = Y.N3, y4 = Y.N4, y5 = Y.N5, y6 = Y.N6, y7 = Y.N7, y8 = Y.N8, y9 = Y.N9; long t; t = (x0 * y8) + (x2 * y6) + (x4 * y4) + (x6 * y2) + (x8 * y0) + 2 * ((x1 * y7) + (x3 * y5) + (x5 * y3) + (x7 * y1)) + 38 * (x9 * y9); XY.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y9) + (x1 * y8) + (x2 * y7) + (x3 * y6) + (x4 * y5) + (x5 * y4) + (x6 * y3) + (x7 * y2) + (x8 * y1) + (x9 * y0); XY.N9 = (t & ((1 << 25) - 1)); t = (x0 * y0) + 19 * ((t >> 25) + (x2 * y8) + (x4 * y6) + (x6 * y4) + (x8 * y2)) + 38 * ((x1 * y9) + (x3 * y7) + (x5 * y5) + (x7 * y3) + (x9 * y1)); XY.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y1) + (x1 * y0) + 19 * ((x2 * y9) + (x3 * y8) + (x4 * y7) + (x5 * y6) + (x6 * y5) + (x7 * y4) + (x8 * y3) + (x9 * y2)); XY.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y2) + (x2 * y0) + 19 * ((x4 * y8) + (x6 * y6) + (x8 * y4)) + 2 * (x1 * y1) + 38 * ((x3 * y9) + (x5 * y7) + (x7 * y5) + (x9 * y3)); XY.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y3) + (x1 * y2) + (x2 * y1) + (x3 * y0) + 19 * ((x4 * y9) + (x5 * y8) + (x6 * y7) + (x7 * y6) + (x8 * y5) + (x9 * y4)); XY.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y4) + (x2 * y2) + (x4 * y0) + 19 * ((x6 * y8) + (x8 * y6)) + 2 * ((x1 * y3) + (x3 * y1)) + 38 * ((x5 * y9) + (x7 * y7) + (x9 * y5)); XY.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y5) + (x1 * y4) + (x2 * y3) + (x3 * y2) + (x4 * y1) + (x5 * y0) + 19 * ((x6 * y9) + (x7 * y8) + (x8 * y7) + (x9 * y6)); XY.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y6) + (x2 * y4) + (x4 * y2) + (x6 * y0) + 19 * (x8 * y8) + 2 * ((x1 * y5) + (x3 * y3) + (x5 * y1)) + 38 * ((x7 * y9) + (x9 * y7)); XY.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y7) + (x1 * y6) + (x2 * y5) + (x3 * y4) + (x4 * y3) + (x5 * y2) + (x6 * y1) + (x7 * y0) + 19 * ((x8 * y9) + (x9 * y8)); XY.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + XY.N8; XY.N8 = (t & ((1 << 26) - 1)); XY.N9 += (t >> 26); return XY; }
/// <remarks> /// Convert to internal format from little-endian byte format /// </remarks> private static void Unpack(Long10 X, byte[] M) { X.N0 = ((M[0] & 0xFF)) | ((M[1] & 0xFF)) << 8 | (M[2] & 0xFF) << 16 | ((M[3] & 0xFF) & 3) << 24; X.N1 = ((M[3] & 0xFF) & ~3) >> 2 | (M[4] & 0xFF) << 6 | (M[5] & 0xFF) << 14 | ((M[6] & 0xFF) & 7) << 22; X.N2 = ((M[6] & 0xFF) & ~7) >> 3 | (M[7] & 0xFF) << 5 | (M[8] & 0xFF) << 13 | ((M[9] & 0xFF) & 31) << 21; X.N3 = ((M[9] & 0xFF) & ~31) >> 5 | (M[10] & 0xFF) << 3 | (M[11] & 0xFF) << 11 | ((M[12] & 0xFF) & 63) << 19; X.N4 = ((M[12] & 0xFF) & ~63) >> 6 | (M[13] & 0xFF) << 2 | (M[14] & 0xFF) << 10 | (M[15] & 0xFF) << 18; X.N5 = (M[16] & 0xFF) | (M[17] & 0xFF) << 8 | (M[18] & 0xFF) << 16 | ((M[19] & 0xFF) & 1) << 24; X.N6 = ((M[19] & 0xFF) & ~1) >> 1 | (M[20] & 0xFF) << 7 | (M[21] & 0xFF) << 15 | ((M[22] & 0xFF) & 7) << 23; X.N7 = ((M[22] & 0xFF) & ~7) >> 3 | (M[23] & 0xFF) << 5 | (M[24] & 0xFF) << 13 | ((M[25] & 0xFF) & 15) << 21; X.N8 = ((M[25] & 0xFF) & ~15) >> 4 | (M[26] & 0xFF) << 4 | (M[27] & 0xFF) << 12 | ((M[28] & 0xFF) & 63) << 20; X.N9 = ((M[28] & 0xFF) & ~63) >> 6 | (M[29] & 0xFF) << 2 | (M[30] & 0xFF) << 10 | (M[31] & 0xFF) << 18; }
/// <remarks> /// Calculates a reciprocal. The output is in reduced form, the inputs need not be. /// Simply calculates y = x^(p-2) so it's not too fast. /// When sqrtassist is true, it instead calculates y = x^((p-5)/8) /// </remarks> private static void Recip(Long10 Y, Long10 X, int SqrtAssist) { Long10 t0 = new Long10(), t1 = new Long10(), t2 = new Long10(), t3 = new Long10(), t4 = new Long10(); int i; // the chain for x^(2^255-21) is straight from djb's implementation Sqr(t1, X); // 2 == 2 * 1 Sqr(t2, t1); // 4 == 2 * 2 Sqr(t0, t2); // 8 == 2 * 4 Mul(t2, t0, X); // 9 == 8 + 1 Mul(t0, t2, t1); // 11 == 9 + 2 Sqr(t1, t0); // 22 == 2 * 11 Mul(t3, t1, t2); // 31 == 22 + 9 Sqr(t1, t3); // 2^6 - 2^1 Sqr(t2, t1); // 2^7 - 2^2 Sqr(t1, t2); // 2^8 - 2^3 Sqr(t2, t1); // 2^9 - 2^4 Sqr(t1, t2); // 2^10 - 2^5 Mul(t2, t1, t3); // 2^10 - 2^0 Sqr(t1, t2); // 2^11 - 2^1 Sqr(t3, t1); // 2^12 - 2^2 for (i = 1; i < 5; i++) // 2^20 - 2^10 { Sqr(t1, t3); Sqr(t3, t1); } // t3 Mul(t1, t3, t2); // 2^20 - 2^0 Sqr(t3, t1); // 2^21 - 2^1 Sqr(t4, t3); // 2^22 - 2^2 for (i = 1; i < 10; i++) // 2^40 - 2^20 { Sqr(t3, t4); Sqr(t4, t3); } // t4 Mul(t3, t4, t1); // 2^40 - 2^0 for (i = 0; i < 5; i++) // 2^50 - 2^10 { Sqr(t1, t3); Sqr(t3, t1); } // t3 Mul(t1, t3, t2); // 2^50 - 2^0 Sqr(t2, t1); // 2^51 - 2^1 Sqr(t3, t2); // 2^52 - 2^2 for (i = 1; i < 25; i++) // 2^100 - 2^50 { Sqr(t2, t3); Sqr(t3, t2); } // t3 Mul(t2, t3, t1); // 2^100 - 2^0 Sqr(t3, t2); // 2^101 - 2^1 Sqr(t4, t3); // 2^102 - 2^2 for (i = 1; i < 50; i++) // 2^200 - 2^100 { Sqr(t3, t4); Sqr(t4, t3); } // t4 Mul(t3, t4, t2); // 2^200 - 2^0 for (i = 0; i < 25; i++) // 2^250 - 2^50 { Sqr(t4, t3); Sqr(t3, t4); } // t3 Mul(t2, t3, t1); // 2^250 - 2^0 Sqr(t1, t2); // 2^251 - 2^1 Sqr(t2, t1); // 2^252 - 2^2 if (SqrtAssist != 0) { Mul(Y, X, t2); // 2^252 - 3 } else { Sqr(t1, t2); // 2^253 - 2^3 Sqr(t2, t1); // 2^254 - 2^4 Sqr(t1, t2); // 2^255 - 2^5 Mul(Y, t1, t0); // 2^255 - 21 } }
/// <remarks> /// Check if reduced-form input >= 2^255-19 /// </remarks> private static bool IsOverflow(Long10 X) { return (((X.N0 > P26 - 19)) && ((X.N1 & X.N3 & X.N5 & X.N7 & X.N9) == P25) && ((X.N2 & X.N4 & X.N6 & X.N8) == P26)) || (X.N9 > P25); }
/// <remarks> /// A square root /// </remarks> private static void Sqrt(Long10 X, Long10 U) { Long10 val = new Long10(), t1 = new Long10(), t2 = new Long10(); AddXY(t1, U, U); // t1 = 2u Recip(val, t1, 1); // v = (2u)^((p-5)/8) Sqr(X, val); // x = v^2 Mul(t2, t1, X); // t2 = 2uv^2 t2.N0--; // t2 = 2uv^2-1 Mul(t1, val, t2); // t1 = v(2uv^2-1) Mul(X, U, t1); // x = uv(2uv^2-1) }
/// <summary> /// Multiply two numbers. The output is in reduced form, the inputs need not be. /// </summary> static void Multiply(Long10 xy, Long10 x, Long10 y) { /* sahn0: * Using local variables to avoid class access. * This seem to improve performance a bit... */ long x0 = x.N0, x1 = x.N1, x2 = x.N2, x3 = x.N3, x4 = x.N4, x5 = x.N5, x6 = x.N6, x7 = x.N7, x8 = x.N8, x9 = x.N9; long y0 = y.N0, y1 = y.N1, y2 = y.N2, y3 = y.N3, y4 = y.N4, y5 = y.N5, y6 = y.N6, y7 = y.N7, y8 = y.N8, y9 = y.N9; long t = (x0*y8) + (x2*y6) + (x4*y4) + (x6*y2) + (x8*y0) + 2*((x1*y7) + (x3*y5) + (x5*y3) + (x7*y1)) + 38* (x9*y9); xy.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y9) + (x1 * y8) + (x2 * y7) + (x3 * y6) + (x4 * y5) + (x5 * y4) + (x6 * y3) + (x7 * y2) + (x8 * y1) + (x9 * y0); xy.N9 = (t & ((1 << 25) - 1)); t = (x0 * y0) + 19 * ((t >> 25) + (x2 * y8) + (x4 * y6) + (x6 * y4) + (x8 * y2)) + 38 * ((x1 * y9) + (x3 * y7) + (x5 * y5) + (x7 * y3) + (x9 * y1)); xy.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y1) + (x1 * y0) + 19 * ((x2 * y9) + (x3 * y8) + (x4 * y7) + (x5 * y6) + (x6 * y5) + (x7 * y4) + (x8 * y3) + (x9 * y2)); xy.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y2) + (x2 * y0) + 19 * ((x4 * y8) + (x6 * y6) + (x8 * y4)) + 2 * (x1 * y1) + 38 * ((x3 * y9) + (x5 * y7) + (x7 * y5) + (x9 * y3)); xy.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y3) + (x1 * y2) + (x2 * y1) + (x3 * y0) + 19 * ((x4 * y9) + (x5 * y8) + (x6 * y7) + (x7 * y6) + (x8 * y5) + (x9 * y4)); xy.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y4) + (x2 * y2) + (x4 * y0) + 19 * ((x6 * y8) + (x8 * y6)) + 2 * ((x1 * y3) + (x3 * y1)) + 38 * ((x5 * y9) + (x7 * y7) + (x9 * y5)); xy.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y5) + (x1 * y4) + (x2 * y3) + (x3 * y2) + (x4 * y1) + (x5 * y0) + 19 * ((x6 * y9) + (x7 * y8) + (x8 * y7) + (x9 * y6)); xy.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (x0 * y6) + (x2 * y4) + (x4 * y2) + (x6 * y0) + 19 * (x8 * y8) + 2 * ((x1 * y5) + (x3 * y3) + (x5 * y1)) + 38 * ((x7 * y9) + (x9 * y7)); xy.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (x0 * y7) + (x1 * y6) + (x2 * y5) + (x3 * y4) + (x4 * y3) + (x5 * y2) + (x6 * y1) + (x7 * y0) + 19 * ((x8 * y9) + (x9 * y8)); xy.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + xy.N8; xy.N8 = (t & ((1 << 26) - 1)); xy.N9 += (t >> 26); }
/// <remarks> /// A = P + Q where: /// X(A) = ax/az /// X(P) = (t1+t2)/(t1-t2) /// X(Q) = (t3+t4)/(t3-t4) /// X(P-Q) = dx /// clobbers t1 and t2, preserves t3 and t4 /// </remarks> private static void MontAdd(Long10 T1, Long10 T2, Long10 T3, Long10 T4, Long10 Ax, Long10 Az, Long10 Dx) { Mul(Ax, T2, T3); Mul(Az, T1, T4); AddXY(T1, Ax, Az); Sub(T2, Ax, Az); Sqr(Ax, T1); Sqr(T1, T2); Mul(Az, T1, Dx); }
/// <summary> /// Multiply a number by a small integer in range -185861411 .. 185861411. /// The output is in reduced form, the input x need not be. x and xy may point /// to the same buffer. /// </summary> static void MulSmall(Long10 xy, Long10 x, long y) { long temp = (x.N8 * y); xy.N8 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N9 * y); xy.N9 = (temp & ((1 << 25) - 1)); temp = 19 * (temp >> 25) + (x.N0 * y); xy.N0 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N1 * y); xy.N1 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N2 * y); xy.N2 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N3 * y); xy.N3 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N4 * y); xy.N4 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N5 * y); xy.N5 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + (x.N6 * y); xy.N6 = (temp & ((1 << 26) - 1)); temp = (temp >> 26) + (x.N7 * y); xy.N7 = (temp & ((1 << 25) - 1)); temp = (temp >> 25) + xy.N8; xy.N8 = (temp & ((1 << 26) - 1)); xy.N9 += (temp >> 26); }
static void Sub(Long10 xy, Long10 x, Long10 y) { xy.N0 = x.N0 - y.N0; xy.N1 = x.N1 - y.N1; xy.N2 = x.N2 - y.N2; xy.N3 = x.N3 - y.N3; xy.N4 = x.N4 - y.N4; xy.N5 = x.N5 - y.N5; xy.N6 = x.N6 - y.N6; xy.N7 = x.N7 - y.N7; xy.N8 = x.N8 - y.N8; xy.N9 = x.N9 - y.N9; }
/// <remarks> /// Multiply a number by a small integer in range -185861411 .. 185861411. /// The output is in reduced form, the input x need not be. x and xy may point /// to the same buffer. /// </remarks> private static Long10 MulSmall(Long10 XY, Long10 X, long Y) { long t; t = (X.N8 * Y); XY.N8 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N9 * Y); XY.N9 = (t & ((1 << 25) - 1)); t = 19 * (t >> 25) + (X.N0 * Y); XY.N0 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N1 * Y); XY.N1 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N2 * Y); XY.N2 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N3 * Y); XY.N3 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N4 * Y); XY.N4 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N5 * Y); XY.N5 = (t & ((1 << 25) - 1)); t = (t >> 25) + (X.N6 * Y); XY.N6 = (t & ((1 << 26) - 1)); t = (t >> 26) + (X.N7 * Y); XY.N7 = (t & ((1 << 25) - 1)); t = (t >> 25) + XY.N8; XY.N8 = (t & ((1 << 26) - 1)); XY.N9 += (t >> 26); return XY; }
/// <summary> /// Calculates a reciprocal. The output is in reduced form, the inputs need not /// be. Simply calculates y = x^(p-2) so it's not too fast. */ /// When sqrtassist is true, it instead calculates y = x^((p-5)/8) /// </summary> static void Reciprocal(Long10 y, Long10 x, bool sqrtAssist) { Long10 t0 = new Long10(), t1 = new Long10(), t2 = new Long10(), t3 = new Long10(), t4 = new Long10(); int i; /* the chain for x^(2^255-21) is straight from djb's implementation */ Square(t1, x); /* 2 == 2 * 1 */ Square(t2, t1); /* 4 == 2 * 2 */ Square(t0, t2); /* 8 == 2 * 4 */ Multiply(t2, t0, x); /* 9 == 8 + 1 */ Multiply(t0, t2, t1); /* 11 == 9 + 2 */ Square(t1, t0); /* 22 == 2 * 11 */ Multiply(t3, t1, t2); /* 31 == 22 + 9 == 2^5 - 2^0 */ Square(t1, t3); /* 2^6 - 2^1 */ Square(t2, t1); /* 2^7 - 2^2 */ Square(t1, t2); /* 2^8 - 2^3 */ Square(t2, t1); /* 2^9 - 2^4 */ Square(t1, t2); /* 2^10 - 2^5 */ Multiply(t2, t1, t3); /* 2^10 - 2^0 */ Square(t1, t2); /* 2^11 - 2^1 */ Square(t3, t1); /* 2^12 - 2^2 */ for (i = 1; i < 5; i++) { Square(t1, t3); Square(t3, t1); } /* t3 */ /* 2^20 - 2^10 */ Multiply(t1, t3, t2); /* 2^20 - 2^0 */ Square(t3, t1); /* 2^21 - 2^1 */ Square(t4, t3); /* 2^22 - 2^2 */ for (i = 1; i < 10; i++) { Square(t3, t4); Square(t4, t3); } /* t4 */ /* 2^40 - 2^20 */ Multiply(t3, t4, t1); /* 2^40 - 2^0 */ for (i = 0; i < 5; i++) { Square(t1, t3); Square(t3, t1); } /* t3 */ /* 2^50 - 2^10 */ Multiply(t1, t3, t2); /* 2^50 - 2^0 */ Square(t2, t1); /* 2^51 - 2^1 */ Square(t3, t2); /* 2^52 - 2^2 */ for (i = 1; i < 25; i++) { Square(t2, t3); Square(t3, t2); } /* t3 */ /* 2^100 - 2^50 */ Multiply(t2, t3, t1); /* 2^100 - 2^0 */ Square(t3, t2); /* 2^101 - 2^1 */ Square(t4, t3); /* 2^102 - 2^2 */ for (i = 1; i < 50; i++) { Square(t3, t4); Square(t4, t3); } /* t4 */ /* 2^200 - 2^100 */ Multiply(t3, t4, t2); /* 2^200 - 2^0 */ for (i = 0; i < 25; i++) { Square(t4, t3); Square(t3, t4); } /* t3 */ /* 2^250 - 2^50 */ Multiply(t2, t3, t1); /* 2^250 - 2^0 */ Square(t1, t2); /* 2^251 - 2^1 */ Square(t2, t1); /* 2^252 - 2^2 */ if (sqrtAssist) { Multiply(y, x, t2); /* 2^252 - 3 */ } else { Square(t1, t2); /* 2^253 - 2^3 */ Square(t2, t1); /* 2^254 - 2^4 */ Square(t1, t2); /* 2^255 - 2^5 */ Multiply(y, t1, t0); /* 2^255 - 21 */ } }
/* A = P + Q where * X(A) = ax/az * X(P) = (t1+t2)/(t1-t2) * X(Q) = (t3+t4)/(t3-t4) * X(P-Q) = dx * clobbers t1 and t2, preserves t3 and t4 */ static void MontyAdd(Long10 t1, Long10 t2, Long10 t3, Long10 t4, Long10 ax, Long10 az, Long10 dx) { Multiply(ax, t2, t3); Multiply(az, t1, t4); Add(t1, ax, az); Sub(t2, ax, az); Square(ax, t1); Square(t1, t2); Multiply(az, t1, dx); }