/// <summary>
        /// Montgomery multiplication
        /// </summary>
        /// <param name="x2">x coordinate for 2Q</param>
        /// <param name="z2">z coordinate for 2Q</param>
        /// <param name="x3">x coordinate for Q + Q'</param>
        /// <param name="z3">z coordinate for Q + Q'</param>
        /// <param name="x">x coordinate of Q</param>
        /// <param name="z">z coordinate of Q</param>
        /// <param name="xprime">x coordinate of Q'</param>
        /// <param name="zprime">z coordinate of Q'</param>
        /// <param name="qmqp">Q - Q'</param>
        /// <remarks>
        /// Input: Q, Q', Q-Q'
        /// Output: 2Q, Q+Q'
        ///   x2 z2: long form
        ///   x3 z3: long form
        ///   x z: short form, destroyed
        ///   xprime zprime: short form, destroyed
        ///   qmqp: short form, preserved
        ///
        /// On entry and exit, the absolute value of the limbs of all inputs and outputs
        /// are< 2^26.
        /// </remarks>
        public static void fmonty(Limb[] x2, Limb[] z2,
                                  ref Limb[] x3, ref Limb[] z3,
                                  Limb[] x, Limb[] z,
                                  Limb[] xprime, Limb[] zprime,
                                  Limb[] qmqp)
        {
            Limb[] origx, origxprime;
            Limb[] zzz, xx, zz, xxprime, zzprime, zzzprime, xxxprime;

            origx = (Limb[])x.Clone();

            fsum(x, z);
            /* |x[i]| < 2^27 */
            fdifference(z, origx);  /* does x - z */
                                    /* |z[i]| < 2^27 */
            origxprime = (Limb[])xprime.Clone();

            fsum(xprime, zprime);
            /* |xprime[i]| < 2^27 */
            fdifference(zprime, origxprime);
            /* |zprime[i]| < 2^27 */
            xxprime = new Limb[19];
            fproduct(xxprime, xprime, z);

            /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be <
             * 2^(27+27) and fproduct adds together, at most, 14 of those products.
             * (Approximating that to 2^58 doesn't work out.) */
            zzprime = new Limb[19];
            fproduct(zzprime, x, zprime);
            /* |zzprime[i]| < 14*2^54 */
            freduce_degree(xxprime);
            freduce_coefficients(xxprime);
            /* |xxprime[i]| < 2^26 */
            freduce_degree(zzprime);
            freduce_coefficients(zzprime);
            /* |zzprime[i]| < 2^26 */
            origxprime = (Limb[])xxprime.Clone();
            fsum(xxprime, zzprime);
            /* |xxprime[i]| < 2^27 */
            fdifference(zzprime, origxprime);
            /* |zzprime[i]| < 2^27 */
            xxxprime = new Limb[19];
            fsquare(xxxprime, xxprime);
            /* |xxxprime[i]| < 2^26 */
            zzzprime = new Limb[19];
            fsquare(zzzprime, zzprime);
            /* |zzzprime[i]| < 2^26 */
            fproduct(zzprime, zzzprime, qmqp);
            /* |zzprime[i]| < 14*2^52 */
            freduce_degree(zzprime);
            freduce_coefficients(zzprime);
            /* |zzprime[i]| < 2^26 */
            x3 = (Limb[])xxxprime.Clone();
            z3 = (Limb[])zzprime.Clone();

            xx = new Limb[19];
            fsquare(xx, x);
            /* |xx[i]| < 2^26 */
            zz = new Limb[19];
            fsquare(zz, z);
            /* |zz[i]| < 2^26 */
            fproduct(x2, xx, zz);
            /* |x2[i]| < 14*2^52 */
            freduce_degree(x2);
            freduce_coefficients(x2);
            /* |x2[i]| < 2^26 */
            fdifference(zz, xx);  // does zz = xx - zz
                                  /* |zz[i]| < 2^27 */
            zzz = new Limb[19];
            fscalar_product(zzz, zz, 121665);
            /* |zzz[i]| < 2^(27+17) */

            /* No need to call freduce_degree here:
             * fscalar_product doesn't increase the degree of its input. */
            freduce_coefficients(zzz);
            /* |zzz[i]| < 2^26 */
            fsum(zzz, xx);
            /* |zzz[i]| < 2^27 */
            fproduct(z2, zz, zzz);
            /* |z2[i]| < 14*2^(26+27) */
            freduce_degree(z2);
            freduce_coefficients(z2);
            /* |z2|i| < 2^26 */
        }