private static void Advance(NBody *bodies, Delta *r, double *mag, double dt) { unchecked { //NBody* i = null, j = null; //Delta* k = null; //for (i = ptrSun, k = r; i < ptrEnd; ++i) { // NBody iBody = *i; // for (j = i + 1; j <= ptrEnd; ++j, ++k) { // k->dx = iBody.x - j->x; // k->dy = iBody.y - j->y; // k->dz = iBody.z - j->z; // } //} //var arr = ToArray(r, 1000); //var a2 = ToArray(mag, 1000); for (int i = 0, k = 0; i < SIZE - 1; ++i) { NBody iBody = bodies[i]; for (int j = i + 1; j < SIZE; ++j, ++k) { r[k].dx = iBody.x - bodies[j].x; r[k].dy = iBody.y - bodies[j].y; r[k].dz = iBody.z - bodies[j].z; } } for (int i = 0; i < N; i += 2) { Vector128 <double> dx = default, dy = default, dz = default;
private static double Energy(NBody *ptrSun, NBody *ptrEnd) { unchecked { double e = 0.0; for (NBody *bi = ptrSun; bi <= ptrEnd; ++bi) { double imass = bi->mass, ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz; e += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); for (NBody *bj = bi + 1; bj <= ptrEnd; ++bj) { double jmass = bj->mass, dx = ix - bj->x, dy = iy - bj->y, dz = iz - bj->z; e -= imass * jmass / Math.Sqrt(dx * dx + dy * dy + dz * dz); } } return(e); } }
private static double Energy(NBody *bi, NBody *last) { unchecked { double e = 0.0; for (; bi <= last; ++bi) { double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; e += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); for (NBody *bj = bi + 1; bj <= last; ++bj) { double dx = ix - bj->x, dy = iy - bj->y, dz = iz - bj->z; e -= imass * bj->mass / Math.Sqrt(dx * dx + dy * dy + dz * dz); } } return(e); } }
private unsafe static void Energy(NBody *bodies, NBody *last) { unchecked { double e = 0.0; NBody *bi = bodies; OUTERLOOP: NBody * bj = bi + 1; double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; e += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); INNERLOOP: double dx = ix - bj->x, dy = iy - bj->y, dz = iz - bj->z; e -= imass * bj->mass / Math.Sqrt(dx * dx + dy * dy + dz * dz); if (++bj <= last) { goto INNERLOOP; } if (++bi <= last) { goto OUTERLOOP; } Console.Out.WriteLine(e.ToString("F9")); } }
private static void Energy(NBody *ptrSun, NBody *ptrEnd) { unchecked { double e = 0.0; for (NBody *bi = ptrSun; bi <= ptrEnd; ++bi) { double imass = bi->mass, ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz; e += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); for (NBody *bj = bi + 1; bj <= ptrEnd; ++bj) { double jmass = bj->mass, dx = ix - bj->x, dy = iy - bj->y, dz = iz - bj->z; e -= imass * jmass / Math.Sqrt(dx * dx + dy * dy + dz * dz); } } Console.Out.WriteLine(e.ToString("F9")); } }
public static void Main(string[] args) { var stopWatch = Stopwatch.StartNew(); unchecked { // There doesn't seem to be a difference between // using stackalloc or fixed pointer. // stackalloc uses fewer lines though. NBody *ptrSun = stackalloc NBody[5]; NBody *ptrEnd = ptrSun + 4; InitBodies(ptrSun, ptrEnd); Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 50000000; while (advancements-- > 0) { Advance(ptrSun, ptrEnd, 0.01d); } Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); } stopWatch.Stop(); Console.WriteLine($"NBody C# Baseline Elapsed Time: {stopWatch.Elapsed}"); }
private static void Advance(NBody *ptrSun, NBody *ptrEnd, double distance) { unchecked { for (NBody *i = ptrSun; i < ptrEnd; ++i) { NBody bi = *i; for (NBody *j = i + 1; j <= ptrEnd; ++j) { NBody bj = *j; double dx = bj.x - bi.x, dy = bj.y - bi.y, dz = bj.z - bi.z, mag = distance / GetMagnitude(dx, dy, dz); bj.vx = bj.vx - dx * bi.mass * mag; bj.vy = bj.vy - dy * bi.mass * mag; bj.vz = bj.vz - dz * bi.mass * mag; bi.vx = bi.vx + dx * bj.mass * mag; bi.vy = bi.vy + dy * bj.mass * mag; bi.vz = bi.vz + dz * bj.mass * mag; } bi.x = bi.x + bi.vx * distance; bi.y = bi.y + bi.vy * distance; bi.z = bi.z + bi.vz * distance; } ptrEnd->x = ptrEnd->x + ptrEnd->vx * distance; ptrEnd->y = ptrEnd->y + ptrEnd->vy * distance; ptrEnd->z = ptrEnd->z + ptrEnd->vz * distance; } }
private unsafe static void Energy(NBody *bi, NBody *last) { unchecked { double e = 0.0; for (; bi <= last; ++bi) { double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; e += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); for (NBody *bj = bi + 1; bj <= last; ++bj) { double dx = ix - bj->x, dy = iy - bj->y, dz = iz - bj->z; e -= bj->mass * imass / Math.Sqrt(dx * dx + dy * dy + dz * dz); } } Console.Out.WriteLine(e.ToString("F9")); } }
public static void Main(string[] args) { unchecked { NBody *bodies = stackalloc NBody[] { new NBody { //Sun mass = Solarmass }, new NBody { //jupiter x = 4.84143144246472090e+00, y = -1.16032004402742839e+00, z = -1.03622044471123109e-01, vx = 1.66007664274403694e-03 * DaysPeryear, vy = 7.69901118419740425e-03 * DaysPeryear, vz = -6.90460016972063023e-05 * DaysPeryear, mass = 9.54791938424326609e-04 * Solarmass }, new NBody { //saturn x = 8.34336671824457987e+00, y = 4.12479856412430479e+00, z = -4.03523417114321381e-01, vx = -2.76742510726862411e-03 * DaysPeryear, vy = 4.99852801234917238e-03 * DaysPeryear, vz = 2.30417297573763929e-05 * DaysPeryear, mass = 2.85885980666130812e-04 * Solarmass }, new NBody { //uranus x = 1.28943695621391310e+01, y = -1.51111514016986312e+01, z = -2.23307578892655734e-01, vx = 2.96460137564761618e-03 * DaysPeryear, vy = 2.37847173959480950e-03 * DaysPeryear, vz = -2.96589568540237556e-05 * DaysPeryear, mass = 4.36624404335156298e-05 * Solarmass }, new NBody { //neptune x = 1.53796971148509165e+01, y = -2.59193146099879641e+01, z = 1.79258772950371181e-01, vx = 2.68067772490389322e-03 * DaysPeryear, vy = 1.62824170038242295e-03 * DaysPeryear, vz = -9.51592254519715870e-05 * DaysPeryear, mass = 5.15138902046611451e-05 * Solarmass } }; InitBodies(bodies); Console.Out.WriteLine(Energy(bodies).ToString("F9")); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(bodies, 0.01d); } Console.Out.WriteLine(Energy(bodies).ToString("F9")); } }
private static void InitBodies(NBody *ptrSun, NBody *ptrEnd) { const double Pi = 3.141592653589793; const double Solarmass = 4 * Pi * Pi; const double DaysPeryear = 365.24; unchecked { ptrSun[1] = new NBody { //jupiter mass = 9.54791938424326609e-04 * Solarmass, x = 4.84143144246472090e+00, y = -1.16032004402742839e+00, z = -1.03622044471123109e-01, vx = 1.66007664274403694e-03 * DaysPeryear, vy = 7.69901118419740425e-03 * DaysPeryear, vz = -6.90460016972063023e-05 * DaysPeryear }; ptrSun[2] = new NBody { //saturn mass = 2.85885980666130812e-04 * Solarmass, x = 8.34336671824457987e+00, y = 4.12479856412430479e+00, z = -4.03523417114321381e-01, vx = -2.76742510726862411e-03 * DaysPeryear, vy = 4.99852801234917238e-03 * DaysPeryear, vz = 2.30417297573763929e-05 * DaysPeryear }; ptrSun[3] = new NBody { //uranus mass = 4.36624404335156298e-05 * Solarmass, x = 1.28943695621391310e+01, y = -1.51111514016986312e+01, z = -2.23307578892655734e-01, vx = 2.96460137564761618e-03 * DaysPeryear, vy = 2.37847173959480950e-03 * DaysPeryear, vz = -2.96589568540237556e-05 * DaysPeryear }; ptrSun[4] = new NBody { //neptune mass = 5.15138902046611451e-05 * Solarmass, x = 1.53796971148509165e+01, y = -2.59193146099879641e+01, z = 1.79258772950371181e-01, vx = 2.68067772490389322e-03 * DaysPeryear, vy = 1.62824170038242295e-03 * DaysPeryear, vz = -9.51592254519715870e-05 * DaysPeryear }; double vx = 0, vy = 0, vz = 0; for (NBody *planet = ptrSun + 1; planet <= ptrEnd; ++planet) { double mass = planet->mass; vx += planet->vx * mass; vy += planet->vy * mass; vz += planet->vz * mass; } ptrSun->mass = Solarmass; ptrSun->vx = vx / -Solarmass; ptrSun->vy = vy / -Solarmass; ptrSun->vz = vz / -Solarmass; } }
public unsafe static void Main(string[] args) { unchecked { NBody *bodies = stackalloc NBody[5]; NBody *last = bodies + 4; InitBodies(bodies, last); Energy(bodies, last); Run(args.Length > 0 ? Int32.Parse(args[0]) : 1000, bodies, last); Energy(bodies, last); } }
private unsafe static void Run(int advancements, NBody *bodies, NBody *last) { unchecked { ADVANCE: NBody * bi = bodies; OUTERLOOP: NBody * bj = bi + 1; double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; INNERLOOP: double dx = bj->x - ix, dy = bj->y - iy, dz = bj->z - iz, jmass = bj->mass, mag = GetMagnitude(dx, dy, dz); bj->vx = bj->vx - dx * imass * mag; bj->vy = bj->vy - dy * imass * mag; bj->vz = bj->vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; if (++bj <= last) { goto INNERLOOP; } bi->x = ix + ivx * DT; bi->y = iy + ivy * DT; bi->z = iz + ivz * DT; bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; if (++bi < last) { goto OUTERLOOP; } last->x = last->x + last->vx * DT; last->y = last->y + last->vy * DT; last->z = last->z + last->vz * DT; if (--advancements > 0) { goto ADVANCE; } } }
private unsafe static void Run(int advancements) { unchecked { NBody *bodies = stackalloc NBody[5]; NBody *last = bodies + 4; InitBodies(bodies, last); Energy(bodies, last); while (advancements-- > 0) { Advance(bodies, last); } Energy(bodies, last); } }
private static void InitBodies(NBody *bodies) { unchecked { double px = 0, py = 0, pz = 0; for (NBody *planet = bodies + 1; planet <= (bodies + LAST); ++planet) { px += planet->vx * planet->mass; py += planet->vy * planet->mass; pz += planet->vz * planet->mass; } bodies->vx = px / -Solarmass; bodies->vy = py / -Solarmass; bodies->vz = pz / -Solarmass; } }
private static void Advance(NBody *ptrSun, NBody *ptrEnd, double *mags, double dt) { unchecked { for (NBody *bi = ptrSun; bi < ptrEnd; ++bi) { // Dereference common variables now so they're likely to // get stored in registers. The performance advantage is // lost if pointers are dereferenced every time. Accounts for ~7%. double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; for (NBody *bj = bi + 1; bj <= ptrEnd; ++bj) { double dx = bj->x - ix, dy = bj->y - iy, dz = bj->z - iz, vx = bj->vx, vy = bj->vy, vz = bj->vz, jmass = bj->mass, mag = dt / GetMagnitude(dx, dy, dz); bj->vx = vx - dx * imass * mag; bj->vy = vy - dy * imass * mag; bj->vz = vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; } bi->x = ix + ivx * dt; bi->y = iy + ivy * dt; bi->z = iz + ivz * dt; bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; } ptrEnd->x = ptrEnd->x + ptrEnd->vx * dt; ptrEnd->y = ptrEnd->y + ptrEnd->vy * dt; ptrEnd->z = ptrEnd->z + ptrEnd->vz * dt; } }
public static void Main(string[] args) { unchecked { NBody *bodies = stackalloc NBody[5]; NBody *last = bodies + 4; InitBodies(bodies, last); Energy(bodies, last); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(bodies, last); } Energy(bodies, last); } }
public static void Main(string[] args) { unchecked { NBody *bodies = stackalloc NBody[5]; NBody *last = bodies + 4; InitBodies(bodies, last); Console.Out.WriteLine(Energy(bodies, last).ToString("F9")); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(bodies, last, 0.01d); } Console.Out.WriteLine(Energy(bodies, last).ToString("F9")); } }
public static void Main(string[] args) { unchecked { NBody *ptrSun = stackalloc NBody[5]; // There doesn't seem to be a difference between NBody *ptrEnd = ptrSun + 4; // using stackalloc or fixed pointer. InitBodies(ptrSun, ptrEnd); // stackalloc uses fewer lines though. Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(ptrSun, ptrEnd, 0.01d); } Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); } }
private static void Advance(NBody *bi, NBody *last) { unchecked { OUTERLOOP: double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; NBody *bj = bi + 1; INNERLOOP: double dx = bj->x - ix, dy = bj->y - iy, dz = bj->z - iz, jmass = bj->mass, mag = GetMagnitude(dx, dy, dz); bj->vx = bj->vx - dx * imass * mag; bj->vy = bj->vy - dy * imass * mag; bj->vz = bj->vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; if (++bj <= last) { goto INNERLOOP; } bi->x = ix + ivx * DT; bi->y = iy + ivy * DT; bi->z = iz + ivz * DT; bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; if (++bi < last) { goto OUTERLOOP; } bi->x = bi->x + bi->vx * DT; bi->y = bi->y + bi->vy * DT; bi->z = bi->z + bi->vz * DT; } }
private static void Advance(NBody *bodies, Delta *r, double dt) { unchecked { double *mag = stackalloc double[1000]; for (int i = 0, k = 0; i < SIZE - 1; ++i) { NBody iBody = bodies[i]; for (int j = i + 1; j < SIZE; ++j, ++k) { r[k].dx = iBody.x - bodies[j].x; r[k].dy = iBody.y - bodies[j].y; r[k].dz = iBody.z - bodies[j].z; } } for (int i = 0; i < N; i += 2) { Vector128 <double> dx = default, dy = default, dz = default;
public static void Main(string[] args) { unchecked { NBody *ptrSun = stackalloc NBody[SIZE]; NBody *ptrEnd = ptrSun + SIZE - 1; InitBodies(ptrSun, ptrEnd); Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); double *mags = stackalloc double[N]; int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(ptrSun, ptrEnd, mags, 0.01d); } Console.Out.WriteLine(Energy(ptrSun, ptrEnd).ToString("F9")); } }
public static void Main(string[] args) { unchecked { NBody *ptrSun = stackalloc NBody[SIZE]; Delta *r = stackalloc Delta[N]; InitBodies(ptrSun); Console.Out.WriteLine(Energy(ptrSun).ToString("F9")); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(ptrSun, r, 0.01d); } Console.Out.WriteLine(Energy(ptrSun).ToString("F9")); } }
public static void Main(string[] args) { int count = Vector <double> .Count; unchecked { NBody *ptrSun = stackalloc NBody[5]; // There doesn't seem to be a difference between NBody *ptrEnd = ptrSun + 4; // using stackalloc or fixed pointer. InitBodies(ptrSun, ptrEnd); // stackalloc uses fewer lines though. Energy(ptrSun, ptrEnd); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(ptrSun, ptrEnd, 0.01d); } Energy(ptrSun, ptrEnd); } }
private static void Advance(NBody *ptrSun, NBody *ptrEnd, double distance) { unchecked { for (NBody *bi = ptrSun; bi < ptrEnd; ++bi) { double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; for (NBody *bj = bi + 1; bj <= ptrEnd; ++bj) { Vector <double> jpos = new Vector <double>(new [] { bj->x, bj->y, bj->z }); double dx = bj->x - ix, dy = bj->y - iy, dz = bj->z - iz, jmass = bj->mass, mag = distance / GetMagnitude(dx, dy, dz); bj->vx = bj->vx - dx * imass * mag; bj->vy = bj->vy - dy * imass * mag; bj->vz = bj->vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; } bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; bi->x = ix + ivx * distance; bi->y = iy + ivy * distance; bi->z = iz + ivz * distance; } ptrEnd->x = ptrEnd->x + ptrEnd->vx * distance; ptrEnd->y = ptrEnd->y + ptrEnd->vy * distance; ptrEnd->z = ptrEnd->z + ptrEnd->vz * distance; } }
private unsafe static void Advance(NBody *bi, NBody *last) { unchecked { for (; bi < last; ++bi) { double ix = bi->x, iy = bi->y, iz = bi->z, ivx = bi->vx, ivy = bi->vy, ivz = bi->vz, imass = bi->mass; for (NBody *bj = bi + 1; bj <= last; ++bj) { double dx = bj->x - ix, dy = bj->y - iy, dz = bj->z - iz, jmass = bj->mass, mag = GetMagnitude(dx, dy, dz); bj->vx = bj->vx - dx * imass * mag; bj->vy = bj->vy - dy * imass * mag; bj->vz = bj->vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; } bi->x = ix + ivx * DT; bi->y = iy + ivy * DT; bi->z = iz + ivz * DT; bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; } last->x = last->x + last->vx * DT; last->y = last->y + last->vy * DT; last->z = last->z + last->vz * DT; } }
//fixed double mag[1000]; public static void Main(string[] args) { unchecked { NBody * ptrSun = stackalloc NBody[SIZE]; Delta * r = stackalloc Delta[N]; double *mag = stackalloc double[N]; InitBodies(ptrSun); // stackalloc uses fewer lines though. string result = Energy(ptrSun).ToString("F9"); //Debug.Assert(result == "-0.169075164", "Incorrect result of: " + result); Console.Out.WriteLine(result); int advancements = args.Length > 0 ? Int32.Parse(args[0]) : 1000; while (advancements-- > 0) { Advance(ptrSun, r, mag, 0.01d); } result = Energy(ptrSun).ToString("F9"); //Debug.Assert(result == "-0.169059907", "Incorrect result of: " + result); Console.Out.WriteLine(result); } }
public void Initialize(NBody *ptr) { // Align fixed Array to 16 bytes ulong alignOffset = (ulong)ptr % 16; ulong dataAddr = alignOffset == 0 ? (ulong)ptr : (ulong)ptr + (16 - alignOffset); double *dataPtr = (double *)(dataAddr); XPtr = dataPtr; YPtr = (double *)(dataAddr + YOffset); ZPtr = (double *)(dataAddr + ZOffset); VxPtr = (double *)(dataAddr + VXOffset); VyPtr = (double *)(dataAddr + VYOffset); VzPtr = (double *)(dataAddr + VZOffset); MPtr = (double *)(dataAddr + MOffset); XPtr[2] = 4.84143144246472090e+00; XPtr[3] = 8.34336671824457987e+00; XPtr[4] = 1.28943695621391310e+01; XPtr[5] = 1.53796971148509165e+01; YPtr[2] = -1.16032004402742839e+00; YPtr[3] = 4.12479856412430479e+00; YPtr[4] = -1.51111514016986312e+01; YPtr[5] = -2.59193146099879641e+01; ZPtr[2] = -1.03622044471123109e-01; ZPtr[3] = -4.03523417114321381e-01; ZPtr[4] = -2.23307578892655734e-01; ZPtr[5] = 1.79258772950371181e-01; VxPtr[2] = 1.66007664274403694e-03; VxPtr[3] = -2.76742510726862411e-03; VxPtr[4] = 2.96460137564761618e-03; VxPtr[5] = 2.68067772490389322e-03; VyPtr[2] = 7.69901118419740425e-03; VyPtr[3] = 4.99852801234917238e-03; VyPtr[4] = 2.37847173959480950e-03; VyPtr[5] = 1.62824170038242295e-03; VzPtr[2] = -6.90460016972063023e-05; VzPtr[3] = 2.30417297573763929e-05; VzPtr[4] = -2.96589568540237556e-05; VzPtr[5] = -9.51592254519715870e-05; MPtr[0] = Solarmass; if (Sse3.IsSupported) { Vector128 <double> vx = Sse2.SetZeroVector128 <double>(); Vector128 <double> vy = Sse2.SetZeroVector128 <double>(); Vector128 <double> vz = Sse2.SetZeroVector128 <double>(); Vector128 <double> yearInDays = Sse2.SetAllVector128(DaysPerYear); Vector128 <double> mSol = Sse2.SetAllVector128(Solarmass); double *mInitData = stackalloc double[] { 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05 }; for (int i = 2; i < 6; i += 2) { Vector128 <double> vxV = Sse2.LoadAlignedVector128(VxPtr + i); Vector128 <double> vyV = Sse2.LoadAlignedVector128(VyPtr + i); Vector128 <double> vzV = Sse2.LoadAlignedVector128(VzPtr + i); // Initialize v(xyz) vectors vxV = Sse2.Multiply(vxV, yearInDays); vyV = Sse2.Multiply(vyV, yearInDays); vzV = Sse2.Multiply(vzV, yearInDays); // Initialize mV vector Vector128 <double> mV = Unsafe.Read <Vector128 <double> >(mInitData + i - 2); mV = Sse2.Multiply(mV, mSol); // Try to use RM second op overload vx = Sse2.Add(vx, Sse2.Multiply(mV, vxV)); vy = Sse2.Add(vy, Sse2.Multiply(mV, vyV)); vz = Sse2.Add(vz, Sse2.Multiply(mV, vzV)); // Save mV vector Unsafe.Write(MPtr + i, mV); // Save initialized v(xyz) vectors Sse2.StoreAligned(VxPtr + i, vxV); Sse2.StoreAligned(VyPtr + i, vyV); Sse2.StoreAligned(VzPtr + i, vzV); } vx = Sse3.HorizontalAdd(vx, vy); vx = Sse2.Divide(vx, mSol); vz = Sse3.HorizontalAdd(vz, vz); vz = Sse2.Divide(vz, mSol); VxPtr[0] = Sse2.ConvertToDouble(vx); vx = Sse.StaticCast <byte, double>(Sse2.ShiftRightLogical128BitLane(Sse.StaticCast <double, byte>(vx), 8)); VyPtr[0] = Sse2.ConvertToDouble(vx); VzPtr[0] = Sse2.ConvertToDouble(vz); } }
public static void Main(string[] args) { var stopWatch = Stopwatch.StartNew(); #if FEATURE_TIMING Console.WriteLine($"Running on .NETCoreApp: {GetNetCoreVersion()}"); long ticks = Stopwatch.GetTimestamp(); #endif NBody *nBodyPtr = stackalloc NBody[1]; #if FEATURE_TIMING var stopWatch = Stopwatch.StartNew(); stopWatch.Restart(); #endif nBodyPtr->Initialize(nBodyPtr); #if FEATURE_TIMING stopWatch.Stop(); Console.WriteLine($"Elapsed Initialize: {stopWatch.ElapsedTicks.ToString(format)}"); stopWatch.Restart(); #endif Console.Out.WriteLineAsync(nBodyPtr->Energy().ToString("F9")); #if FEATURE_TIMING stopWatch.Stop(); Console.WriteLine($"Elapsed Enenrgy origin: {stopWatch.ElapsedTicks.ToString(format)}"); #endif int iterations = args.Length > 0 ? Int32.Parse(args[0]) : 50000000; #if FEATURE_TIMING stopWatch.Restart(); #endif while (iterations-- > 0) { nBodyPtr->Advance(); } #if FEATURE_TIMING stopWatch.Stop(); Console.WriteLine($"Elapsed Advance: {stopWatch.ElapsedTicks.ToString(format)}"); stopWatch.Restart(); #endif Console.Out.WriteLine(nBodyPtr->Energy().ToString("F9")); #if FEATURE_TIMING stopWatch.Stop(); Console.WriteLine($"Elapsed Enenrgy target: {stopWatch.ElapsedTicks.ToString(format)}"); long endTicks = Stopwatch.GetTimestamp(); Console.WriteLine($"Elapsed total time: {(endTicks - ticks).ToString(format)}"); #endif stopWatch.Stop(); Console.WriteLine($"NBodyHWIntrinsics C# Elapsed Time: {stopWatch.Elapsed}"); }
public static void AdvanceStatic(NBody *nBodyPtr, double distance = 0.01d) { Vector128 <double> disV = Sse2.SetAllVector128(distance); double * xPtr = nBodyPtr->XPtr, yPtr = nBodyPtr->YPtr, zPtr = nBodyPtr->ZPtr, vxPtr = nBodyPtr->VxPtr, vyPtr = nBodyPtr->VyPtr, vzPtr = nBodyPtr->VzPtr, mPtr = nBodyPtr->MPtr; double * gxPtr = nBodyPtr->XPtr, gyPtr = nBodyPtr->YPtr, gzPtr = nBodyPtr->ZPtr, gvxPtr = nBodyPtr->VxPtr, gvyPtr = nBodyPtr->VyPtr, gvzPtr = nBodyPtr->VzPtr, gmPtr = nBodyPtr->MPtr; for (int i = 0, k = 2; i < 6; i++, k++, mPtr++, vxPtr++, vyPtr++, vzPtr++, xPtr++, yPtr++, zPtr++) { var mV = Sse3.LoadAndDuplicateToVector128(mPtr); var xV = Sse3.LoadAndDuplicateToVector128(xPtr); var yV = Sse3.LoadAndDuplicateToVector128(yPtr); var zV = Sse3.LoadAndDuplicateToVector128(zPtr); var vxV = Sse3.LoadAndDuplicateToVector128(vxPtr); var vyV = Sse3.LoadAndDuplicateToVector128(vyPtr); var vzV = Sse3.LoadAndDuplicateToVector128(vzPtr); for (int j = (k - k % 2); j < 6; j += 2) { var xxV = Sse2.LoadAlignedVector128(gxPtr + j); var yyV = Sse2.LoadAlignedVector128(gyPtr + j); var zzV = Sse2.LoadAlignedVector128(gzPtr + j); xxV = Sse2.Multiply(xxV, xV); yyV = Sse2.Multiply(yyV, yV); zzV = Sse2.Multiply(zzV, zV); var mag = Get2D(xxV, yyV, zzV); mag = Sse2.Multiply(Reciprocal(mag), ReciprocalSqrt(mag)); mag = Sse2.Multiply(mag, disV); var imag = Sse2.Multiply(mV, mag); Sse2.StoreAligned(gvxPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvxPtr + j), Sse2.Multiply(imag, xxV))); Sse2.StoreAligned(gvyPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvyPtr + j), Sse2.Multiply(imag, yyV))); Sse2.StoreAligned(gvzPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvzPtr + j), Sse2.Multiply(imag, zzV))); var jmag = Sse2.Multiply(Sse2.LoadAlignedVector128(gmPtr + j), mag); vxV = Sse2.Add(vxV, Sse2.Multiply(jmag, xxV)); vyV = Sse2.Add(vyV, Sse2.Multiply(jmag, yyV)); vzV = Sse2.Add(vzV, Sse2.Multiply(jmag, zzV)); } // inner loop Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV)); Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV)); Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV)); xV = Sse2.Add(xV, Sse2.Multiply(vxV, disV)); yV = Sse2.Add(yV, Sse2.Multiply(vyV, disV)); zV = Sse2.Add(zV, Sse2.Multiply(vzV, disV)); Sse2.StoreScalar(xPtr, Sse3.HorizontalAdd(xV, xV)); Sse2.StoreScalar(yPtr, Sse3.HorizontalAdd(yV, yV)); Sse2.StoreScalar(zPtr, Sse3.HorizontalAdd(zV, zV)); if (i == 0) { i++; } } // outer loop //*(xPtr - 1) += (*(vxPtr - 1) * distance); //*(yPtr - 1) += (*(vyPtr - 1) * distance); //*(zPtr - 1) += (*(vzPtr - 1) * distance); }
private unsafe static void Run(int advancements) { unchecked { double ix = 0d, iy = 0d, iz = 0d, ivx = 0d, ivy = 0d, ivz = 0d, imass = 0d, dx = 0d, dy = 0d, dz = 0d, jmass = 0d, mag = 0d; Int64 *stack = stackalloc Int64[SIZE + 1]; NBody *bi = (NBody *)stack; NBody *bj = (NBody *)stack; NBody *last = bi + COUNT - 1; InitBodies(bi, last); byte *callstack = (byte *)(stack + SIZE); byte stackptr = 0; callstack[stackptr++] = (byte)Method.Finished; callstack[stackptr++] = (byte)Method.Advance; callstack[stackptr++] = (byte)Method.Energy; RETURN: switch ((Method)callstack[--stackptr]) { case Method.Energy: mag = 0d; bi = (NBody *)stack; goto case Method.EnergyOuterLoop; case Method.EnergyOuterLoop: callstack[stackptr++] = (byte)Method.EnergyInnerLoop; goto case Method.DerefBI; case Method.EnergyInnerLoop: mag += 0.5 * imass * (ivx * ivx + ivy * ivy + ivz * ivz); goto case Method.EnergyInnerLoopBody; case Method.EnergyInnerLoopBody: dx = ix - bj->x; dy = iy - bj->y; dz = iz - bj->z; mag -= imass * bj->mass / Math.Sqrt(dx * dx + dy * dy + dz * dz); if (++bj <= last) { goto case Method.EnergyInnerLoopBody; } if (++bi <= last) { goto case Method.EnergyOuterLoop; } Console.Out.WriteLine(mag.ToString("F9")); goto RETURN; case Method.Advance: bi = (NBody *)stack; goto case Method.AdvanceOuterLoop; case Method.AdvanceOuterLoop: callstack[stackptr++] = (byte)Method.AdvanceInnerLoop; goto case Method.DerefBI; case Method.AdvanceInnerLoop: dx = bj->x - ix; dy = bj->y - iy; dz = bj->z - iz; jmass = bj->mass; mag = GetMagnitude(dx, dy, dz); bj->vx = bj->vx - dx * imass * mag; bj->vy = bj->vy - dy * imass * mag; bj->vz = bj->vz - dz * imass * mag; ivx = ivx + dx * jmass * mag; ivy = ivy + dy * jmass * mag; ivz = ivz + dz * jmass * mag; if (++bj <= last) { goto case Method.AdvanceInnerLoop; } bi->x = ix + ivx * DT; bi->y = iy + ivy * DT; bi->z = iz + ivz * DT; bi->vx = ivx; bi->vy = ivy; bi->vz = ivz; if (++bi < last) { goto case Method.AdvanceOuterLoop; } last->x = last->x + last->vx * DT; last->y = last->y + last->vy * DT; last->z = last->z + last->vz * DT; if (--advancements > 0) { goto case Method.Advance; } goto case Method.Energy; case Method.DerefBI: bj = bi + 1; ix = bi->x; iy = bi->y; iz = bi->z; ivx = bi->vx; ivy = bi->vy; ivz = bi->vz; imass = bi->mass; goto RETURN; case Method.Finished: break; } } }