uint2, GPGPU-N-Body-Sim C# (CSharp)代码示例

示例#1

0

显示文件

文件： MLS_MPM_Fluid_Multithreaded.cs 项目： YoungGyuLee/UnityPractice

        public void Execute(int i)
        {
            Particle p = ps[i];

            // reset particle velocity. we calculate it from scratch each step using the grid
            p.v = 0;

            uint2  cell_idx  = (uint2)p.x;
            float2 cell_diff = (p.x - cell_idx) - 0.5f;

            var weights = stackalloc float2[] {
                0.5f * math.pow(0.5f - cell_diff, 2),
                0.75f - math.pow(cell_diff, 2),
                0.5f * math.pow(0.5f + cell_diff, 2)
            };

            // constructing affine per-particle momentum matrix from APIC / MLS-MPM.
            // see APIC paper (https://web.archive.org/web/20190427165435/https://www.math.ucla.edu/~jteran/papers/JSSTS15.pdf), page 6
            // below equation 11 for clarification. this is calculating C = B * (D^-1) for APIC equation 8,
            // where B is calculated in the inner loop at (D^-1) = 4 is a constant when using quadratic interpolation functions
            float2x2 B = 0;

            for (uint gx = 0; gx < 3; ++gx)
            {
                for (uint gy = 0; gy < 3; ++gy)
                {
                    float weight = weights[gx].x * weights[gy].y;

                    uint2 cell_x     = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                    int   cell_index = (int)cell_x.x * grid_res + (int)cell_x.y;

                    float2 dist = (cell_x - p.x) + 0.5f;
                    float2 weighted_velocity = grid[cell_index].v * weight;

                    var term = math.float2x2(weighted_velocity * dist.x, weighted_velocity * dist.y);

                    B += term;

                    p.v += weighted_velocity;
                }
            }

            p.C = B * 4;

            // advect particles
            p.x += p.v * dt;

            // safety clamp to ensure particles don't exit simulation domain
            p.x = math.clamp(p.x, 1, grid_res - 2);

            if (mouse_down)
            {
                var dist = p.x - mouse_pos;
                if (math.dot(dist, dist) < mouse_radius * mouse_radius)
                {
                    var force = math.normalizesafe(dist, 0) * 1.0f;
                    p.v += force;
                }
            }

            // boundaries
            float2      x_n      = p.x + p.v;
            const float wall_min = 3;
            float       wall_max = (float)grid_res - 4;

            if (x_n.x < wall_min)
            {
                p.v.x += wall_min - x_n.x;
            }
            if (x_n.x > wall_max)
            {
                p.v.x += wall_max - x_n.x;
            }
            if (x_n.y < wall_min)
            {
                p.v.y += wall_min - x_n.y;
            }
            if (x_n.y > wall_max)
            {
                p.v.y += wall_max - x_n.y;
            }

            // no need for the deformation gradient update here,
            // as we never use it in our constitutive equation

            ps[i] = p;
        }
    }

示例#2

0

显示文件

文件： uint8.cs 项目： csritter/MaxMath

 public uint8(uint2 x01, uint2 x23, uint2 x45, uint2 x67)
 {
     this = new uint8(new uint4(x01, x23), new uint4(x45, x67));
 }

示例#3

0

显示文件

文件： MLS_MPM_Fluid_Multithreaded.cs 项目： YoungGyuLee/UnityPractice

        // we now have 2 P2G phases as we need to ensure we have scattered particle masses to the grid,
        // in order to get our density estimate at each frame

        public void Execute()
        {
            var weights = stackalloc float2[3];

            for (int i = 0; i < num_particles; ++i)
            {
                var p = ps[i];

                uint2  cell_idx  = (uint2)p.x;
                float2 cell_diff = (p.x - cell_idx) - 0.5f;
                weights[0] = 0.5f * math.pow(0.5f - cell_diff, 2);
                weights[1] = 0.75f - math.pow(cell_diff, 2);
                weights[2] = 0.5f * math.pow(0.5f + cell_diff, 2);

                // estimating particle volume by summing up neighbourhood's weighted mass contribution
                // MPM course, equation 152
                float density = 0.0f;
                uint  gx, gy;
                for (gx = 0; gx < 3; ++gx)
                {
                    for (gy = 0; gy < 3; ++gy)
                    {
                        float weight     = weights[gx].x * weights[gy].y;
                        int   cell_index = (int)(cell_idx.x + gx - 1) * grid_res + (int)(cell_idx.y + gy - 1);
                        density += grid[cell_index].mass * weight;
                    }
                }

                float volume = p.mass / density;

                // end goal, constitutive equation for isotropic fluid:
                // stress = -pressure * I + viscosity * (velocity_gradient + velocity_gradient_transposed)

                // Tait equation of state. i clamped it as a bit of a hack.
                // clamping helps prevent particles absorbing into each other with negative pressures
                float pressure = math.max(-0.1f, eos_stiffness * (math.pow(density / rest_density, eos_power) - 1));

                float2x2 stress = math.float2x2(
                    -pressure, 0,
                    0, -pressure
                    );

                // velocity gradient - CPIC eq. 17, where deriv of quadratic polynomial is linear
                float2x2 dudv   = p.C;
                float2x2 strain = dudv;

                float trace = strain.c1.x + strain.c0.y;
                strain.c0.y = strain.c1.x = trace;

                float2x2 viscosity_term = dynamic_viscosity * strain;
                stress += viscosity_term;

                var eq_16_term_0 = -volume * 4 * stress * dt;

                for (gx = 0; gx < 3; ++gx)
                {
                    for (gy = 0; gy < 3; ++gy)
                    {
                        float weight = weights[gx].x * weights[gy].y;

                        uint2  cell_x    = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                        float2 cell_dist = (cell_x - p.x) + 0.5f;

                        int  cell_index = (int)cell_x.x * grid_res + (int)cell_x.y;
                        Cell cell       = grid[cell_index];

                        // fused force + momentum contribution from MLS-MPM
                        float2 momentum = math.mul(eq_16_term_0 * weight, cell_dist);
                        cell.v += momentum;

                        grid[cell_index] = cell;
                    }
                }
            }
        }

示例#4

0

显示文件

文件： uint8.cs 项目： csritter/MaxMath

 public uint8(uint4 x0123, uint2 x45, uint2 x67)
 {
     this = new uint8(x0123, new uint4(x45, x67));
 }

示例#5

0

显示文件

文件： uint8.cs 项目： csritter/MaxMath

 public uint8(uint2 x01, uint2 x23, uint4 x4567)
 {
     this = new uint8(new uint4(x01, x23), x4567);
 }

示例#6

0

显示文件

        public void Execute(int i)
        {
            Particle p = ps[i];

            // reset particle velocity. we calculate it from scratch each step using the grid
            p.v = 0;

            // quadratic interpolation weights
            uint2  cell_idx  = (uint2)p.x;
            float2 cell_diff = (p.x - cell_idx) - 0.5f;
            var    weights   = stackalloc float2[] {
                0.5f * math.pow(0.5f - cell_diff, 2),
                0.75f - math.pow(cell_diff, 2),
                0.5f * math.pow(0.5f + cell_diff, 2)
            };

            // constructing affine per-particle momentum matrix from APIC / MLS-MPM.
            // see APIC paper (https://web.archive.org/web/20190427165435/https://www.math.ucla.edu/~jteran/papers/JSSTS15.pdf), page 6
            // below equation 11 for clarification. this is calculating C = B * (D^-1) for APIC equation 8,
            // where B is calculated in the inner loop at (D^-1) = 4 is a constant when using quadratic interpolation functions
            float2x2 B = 0;

            for (uint gx = 0; gx < 3; ++gx)
            {
                for (uint gy = 0; gy < 3; ++gy)
                {
                    float weight = weights[gx].x * weights[gy].y;

                    uint2 cell_x     = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                    int   cell_index = (int)cell_x.x * grid_res + (int)cell_x.y;

                    float2 dist = (cell_x - p.x) + 0.5f;
                    float2 weighted_velocity = grid[cell_index].v * weight;

                    // APIC paper equation 10, constructing inner term for B
                    var term = math.float2x2(weighted_velocity * dist.x, weighted_velocity * dist.y);

                    B += term;

                    p.v += weighted_velocity;
                }
            }
            p.C = B * 4;

            // advect particles
            p.x += p.v * dt;

            // safety clamp to ensure particles don't exit simulation domain
            p.x = math.clamp(p.x, 1, grid_res - 2);

            // mouse interaction
            if (mouse_down)
            {
                var dist = p.x - mouse_pos;
                if (math.dot(dist, dist) < mouse_radius * mouse_radius)
                {
                    float norm_factor = (math.length(dist) / mouse_radius);
                    norm_factor = math.pow(math.sqrt(norm_factor), 8);
                    var force = math.normalize(dist) * norm_factor * 0.5f;
                    p.v += force;
                }
            }

            // deformation gradient update - MPM course, equation 181
            // Fp' = (I + dt * p.C) * Fp
            var Fp_new = math.float2x2(
                1, 0,
                0, 1
                );

            Fp_new += dt * p.C;
            Fs[i]   = math.mul(Fp_new, Fs[i]);

            ps[i] = p;
        }
    }

示例#7

0

显示文件

 public static int ConvertToInt(uint2 result)
 {
     return((int)(result.x + result.y * 10));
 }

示例#8

0

显示文件

文件： MPMSingleThread.cs 项目： j20232/mpm_unity

    void Simulate()
    {
        // 1. reset scratch-pad grid
        for (int i = 0; i < m_numCells; i++)
        {
            var cell = m_grid[i];
            cell.mass = 0;
            cell.v    = 0;
            m_grid[i] = cell;
        }

        // 2. particle-to-grid
        for (int i = 0; i < m_numParticles; i++)
        {
            var p = m_particles[i];

            // Calculate quadratic kernel (see equation (123))
            uint2  cell_idx  = (uint2)p.x;
            float2 cell_diff = (p.x - cell_idx) - 0.5f;
            m_weights[0] = 0.5f * math.pow(0.5f - cell_diff, 2);
            m_weights[1] = 0.75f - math.pow(cell_diff, 2);
            m_weights[2] = 0.5f * math.pow(0.5f + cell_diff, 2);

            // 2.1 calculate weight for the 3x3 neighbouring cells surrounding the particle's position
            // on the grid using an interpolation function
            for (uint gx = 0; gx < 3; gx++)
            {
                for (uint gy = 0; gy < 3; gy++)
                {
                    float weight = m_weights[gx].x * m_weights[gy].y;

                    uint2  cell_x    = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                    float2 cell_dist = (cell_x - p.x) + 0.5f;
                    float2 Q         = math.mul(p.C, cell_dist);

                    // 2.2 calculate quantities like stress (see equation (172))
                    float mass_contrib = weight * p.mass;

                    // Convert 2D index to 1D
                    int  cell_index = (int)cell_x.x * m_gridResolution + (int)cell_x.y;
                    Cell cell       = m_grid[cell_index];

                    // 2.3 scatter particles' momentum to the grid
                    cell.mass         += mass_contrib;
                    cell.v            += mass_contrib * (p.v + Q); // Note: v is momentum
                    m_grid[cell_index] = cell;
                }
            }
        }

        // 3. calculate grid velocities
        for (int i = 0; i < m_numCells; i++)
        {
            var cell = m_grid[i];
            if (cell.mass > 0)
            {
                // convert momentum to velocity
                cell.v /= cell.mass;

                //apply gravity
                cell.v += m_dt * math.float2(0, m_gravity);

                // boundary conditions
                int x = i / m_gridResolution;
                int y = i % m_gridResolution;
                if (x < 2 || x > m_gridResolution - 3)
                {
                    cell.v.x = 0;
                }
                if (y < 2 || y > m_gridResolution - 3)
                {
                    cell.v.y = 0;
                }
            }
            m_grid[i] = cell;
        }

        // 4. grid-to-particle
        for (int i = 0; i < m_numParticles; i++)
        {
            var p = m_particles[i];

            // reset particle velocity
            p.v = 0;

            // quadratic interpolation weights
            uint2  cell_idx  = (uint2)p.x;
            float2 cell_diff = (p.x - cell_idx) - 0.5f;
            m_weights[0] = 0.5f * math.pow(0.5f - cell_diff, 2);
            m_weights[1] = 0.75f - math.pow(cell_diff, 2);
            m_weights[2] = 0.5f * math.pow(0.5f + cell_diff, 2);

            // construct affine per-particle momentum matrix from APIC
            // below equation 11 for clarification. this is calculating C = B * (D^-1) for APIC equation 8,
            // where B is calculated in the inner loop at (D^-1) = 4 is a constant when using quadratic interpolation functions
            float2x2 B = 0;
            for (uint gx = 0; gx < 3; gx++)
            {
                for (uint gy = 0; gy < 3; gy++)
                {
                    float weight = m_weights[gx].x * m_weights[gy].y;

                    uint2 cell_x     = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                    int   cell_index = (int)cell_x.x * m_gridResolution + (int)cell_x.y;

                    float2 dist = (cell_x - p.x) + 0.5f;
                    float2 weighted_velocity = m_grid[cell_index].v * weight;

                    // APIC paper's equation (10)
                    var term = math.float2x2(weighted_velocity * dist.x, weighted_velocity * dist.y);

                    // calculate new particle velocities
                    B   += term;
                    p.v += weighted_velocity;
                }
            }
            p.C = B * 4;

            // advect particle positions
            p.x += p.v * m_dt;
            p.x  = math.clamp(p.x, 1, m_gridResolution - 2);

            if (m_mouseDown)
            {
                var dist = p.x - m_mousePos;
                if (math.dot(dist, dist) < m_mouseRadius * m_mouseRadius)
                {
                    float norm_factor = (math.length(dist) / m_mouseRadius);
                    norm_factor = math.pow(math.sqrt(norm_factor), 8);
                    var force = math.normalize(dist) * norm_factor * 0.5f;
                    p.v += force;
                }
            }
            m_particles[i] = p;
        }
    }

示例#9

0

显示文件

 public DeadMan(uint2 value) => Value = value;

示例#10

0

显示文件

 public static fp2 fp2(uint2 v)
 {
     return(new fp2(v));
 }

示例#11

0

显示文件

 public fp2(uint2 v)
 {
     this.x = (fp)v.x;
     this.y = (fp)v.y;
 }

示例#12

0

显示文件

        public static uint2 gcd(uint2 x, uint2 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 _x = *(v128 *)&x;
                v128 _y = *(v128 *)&y;

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi32(_x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi32(_y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, _y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, _x, y_is_zero);

                v128 doneMask = any_zero;

                int2 shift = math.tzcnt(x | y);

                x  = shrl(x, math.tzcnt(x));
                _x = *(v128 *)&x;

                do
                {
                    uint2 temp_y = shrl(*(uint2 *)&_y, math.tzcnt(*(uint2 *)&_y));
                    _y = *(v128 *)&temp_y;

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = _x;

                        _x = Sse4_1.min_epu32(_x, _y);
                        _y = Sse4_1.max_epu32(_y, tempX);
                    }
                    else
                    {
                        v128 tempX       = _x;
                        v128 x_greater_y = Operator.greater_mask_uint(_x, _y);

                        _x = Mask.BlendV(_x, _y, x_greater_y);
                        _y = Mask.BlendV(_y, tempX, x_greater_y);
                    }

                    _y = Sse2.sub_epi32(_y, _x);

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi32(_y, ZERO));
                    result   = Mask.BlendV(result, _x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (doneMask.SLong0 != -1);

                uint2 result_temp = shl(*(uint2 *)&result, shift);
                result = *(v128 *)&result_temp;

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(*(uint2 *)&result);
            }
            else
            {
                return(new uint2(gcd(x.x, y.x), gcd(x.y, y.y)));
            }
        }

示例#13

0

显示文件

文件： MarchingCubesBurst.cs 项目： ajbetteridge/MarchingCubesBurst

        public void computeIsoSurface(float isoValue)
        {
            if (curVertices.IsCreated)
            {
                curVertices.Dispose();
            }
            if (curNormals.IsCreated)
            {
                curNormals.Dispose();
            }
            if (curTriangles.IsCreated)
            {
                curTriangles.Dispose();
            }


            //CountVertexPerVoxelJob
            NativeArray <uint2> vertPerCellIn  = new NativeArray <uint2>(totalSize, Allocator.TempJob);
            NativeArray <uint2> vertPerCell    = new NativeArray <uint2>(totalSize, Allocator.TempJob);
            NativeArray <uint>  compactedVoxel = new NativeArray <uint>(totalSize, Allocator.TempJob);


            var countVJob = new CountVertexPerVoxelJob()
            {
                densV       = values,
                nbTriTable  = nbTriTable,
                triTable    = triTable,
                vertPerCell = vertPerCellIn,
                gridSize    = gridSize,
                totalVoxel  = totalSize,
                isoValue    = isoValue
            };

            var countVJobHandle = countVJob.Schedule(totalSize, 128);

            countVJobHandle.Complete();


            //exclusivescan => compute the total number of vertices
            uint2 lastElem = vertPerCellIn[totalSize - 1];

            float timerEsc = Time.realtimeSinceStartup;

            var escanJob = new ExclusiveScanTrivialJob()
            {
                vertPerCell = vertPerCellIn,
                result      = vertPerCell,
                totalVoxel  = totalSize
            };

            var escanJobJobHandle = escanJob.Schedule();

            escanJobJobHandle.Complete();


            uint2 lastScanElem = vertPerCell[totalSize - 1];

            uint newTotalVoxels = lastElem.y + lastScanElem.y;
            uint totalVerts     = lastElem.x + lastScanElem.x;

            if (totalVerts <= 0)
            {
                Debug.LogWarning("Empty iso-surface");
                vertPerCell.Dispose();
                compactedVoxel.Dispose();
                return;
            }

            curVertices = new NativeArray <float3>((int)totalVerts, Allocator.Persistent);
            curNormals  = new NativeArray <float3>((int)totalVerts, Allocator.Persistent);
            //Double the triangles to have both faces
            curTriangles = new NativeArray <int>((int)totalVerts * 2, Allocator.Persistent);

            //compactvoxels

            var compactJob = new CompactVoxelJob()
            {
                vertPerCell = vertPerCell,
                compVoxel   = compactedVoxel,
                gridSize    = gridSize,
                totalVoxel  = totalSize,
                lastElem    = lastElem.y
            };

            var compactJobHandle = compactJob.Schedule(totalSize, 128);

            compactJobHandle.Complete();


            //MC
            var MCJob = new MarchingCubesJob()
            {
                vertices    = curVertices,
                compVoxel   = compactedVoxel,
                vertPerCell = vertPerCell,
                densV       = values,
                nbTriTable  = nbTriTable,
                triTable    = triTable,
                oriGrid     = originGrid,
                dx          = dx,
                gridSize    = gridSize,
                isoValue    = isoValue,
                totalVerts  = totalVerts
            };
            var MCJobHandle = MCJob.Schedule((int)newTotalVoxels, 128);

            MCJobHandle.Complete();

            //Normals
            var NormJob = new ComputeNormalsJob()
            {
                normals  = curNormals,
                vertices = curVertices,
                densV    = values,
                oriGrid  = originGrid,
                dx       = dx,
                gridSize = gridSize
            };
            var NormJobHandle = NormJob.Schedule((int)totalVerts, 128);

            NormJobHandle.Complete();


            for (int i = 0; i < totalVerts - 3; i += 3)
            {
                curTriangles[i]     = i;
                curTriangles[i + 1] = i + 1;
                curTriangles[i + 2] = i + 2;
            }
            //Double the triangles to have both faces
            for (int i = (int)totalVerts; i < totalVerts * 2 - 3; i += 3)
            {
                curTriangles[i]     = i - (int)totalVerts;
                curTriangles[i + 2] = i + 1 - (int)totalVerts;         //Invert triangles here
                curTriangles[i + 1] = i + 2 - (int)totalVerts;
            }

            vertPerCellIn.Dispose();
            vertPerCell.Dispose();
            compactedVoxel.Dispose();
        }

示例#14

0

显示文件

 public Native2DArray(uint2 size, Allocator alloc)
 {
     this.alloc = alloc;
     Length     = size;
     ptr        = (T *)UnsafeUtility.Malloc(size.x * size.y * sizeof(T), 16, alloc);
 }

示例#15

0

显示文件

文件： Vector Boolean Conversion.cs 项目： csritter/MaxMath

        public static bool2 toboolsafe(uint2 x)
        {
            byte2 clamped = (byte2)math.clamp(x, 0, 1);

            return(*(bool2 *)&clamped);
        }

示例#16

0

显示文件

文件： TestUtils.cs 项目： 0geova0/Jam

 // uint
 public static void AreEqual(uint2 a, uint2 b)
 {
     AreEqual(a.x, b.x);
     AreEqual(a.y, b.y);
 }

示例#17

0

显示文件

文件： DriverAPI.cs 项目： lvaleriu/managedCuda

 public static extern CUResult cuMemcpyDtoH_v2(ref uint2 dstHost, CUdeviceptr srcDevice, SizeT ByteCount);

示例#18

0

显示文件

        public void Execute()
        {
            var weights = stackalloc float2[3];

            for (int i = 0; i < num_particles; ++i)
            {
                var p = ps[i];

                float2x2 stress = 0;

                // deformation gradient
                var F = Fs[i];

                var J = math.determinant(F);

                // MPM course, page 46
                var volume = p.volume_0 * J;

                // useful matrices for Neo-Hookean model
                var F_T             = math.transpose(F);
                var F_inv_T         = math.inverse(F_T);
                var F_minus_F_inv_T = F - F_inv_T;

                // MPM course equation 48
                var P_term_0 = elastic_mu * (F_minus_F_inv_T);
                var P_term_1 = elastic_lambda * math.log(J) * F_inv_T;
                var P        = P_term_0 + P_term_1;

                // cauchy_stress = (1 / det(F)) * P * F_T
                // equation 38, MPM course
                stress = (1.0f / J) * math.mul(P, F_T);

                // (M_p)^-1 = 4, see APIC paper and MPM course page 42
                // this term is used in MLS-MPM paper eq. 16. with quadratic weights, Mp = (1/4) * (delta_x)^2.
                // in this simulation, delta_x = 1, because i scale the rendering of the domain rather than the domain itself.
                // we multiply by dt as part of the process of fusing the momentum and force update for MLS-MPM
                var eq_16_term_0 = -volume * 4 * stress * dt;

                // quadratic interpolation weights
                uint2  cell_idx  = (uint2)p.x;
                float2 cell_diff = (p.x - cell_idx) - 0.5f;
                weights[0] = 0.5f * math.pow(0.5f - cell_diff, 2);
                weights[1] = 0.75f - math.pow(cell_diff, 2);
                weights[2] = 0.5f * math.pow(0.5f + cell_diff, 2);

                // for all surrounding 9 cells
                for (uint gx = 0; gx < 3; ++gx)
                {
                    for (uint gy = 0; gy < 3; ++gy)
                    {
                        float weight = weights[gx].x * weights[gy].y;

                        uint2  cell_x    = math.uint2(cell_idx.x + gx - 1, cell_idx.y + gy - 1);
                        float2 cell_dist = (cell_x - p.x) + 0.5f;
                        float2 Q         = math.mul(p.C, cell_dist);

                        // scatter mass and momentum to the grid
                        int  cell_index = (int)cell_x.x * grid_res + (int)cell_x.y;
                        Cell cell       = grid[cell_index];

                        // MPM course, equation 172
                        float weighted_mass = weight * p.mass;
                        cell.mass += weighted_mass;

                        // APIC P2G momentum contribution
                        cell.v += weighted_mass * (p.v + Q);

                        // fused force/momentum update from MLS-MPM
                        // see MLS-MPM paper, equation listed after eqn. 28
                        float2 momentum = math.mul(eq_16_term_0 * weight, cell_dist);
                        cell.v += momentum;

                        // total update on cell.v is now:
                        // weight * (dt * M^-1 * p.volume * p.stress + p.mass * p.C)
                        // this is the fused momentum + force from MLS-MPM. however, instead of our stress being derived from the energy density,
                        // i use the weak form with cauchy stress. converted:
                        // p.volume_0 * (dΨ/dF)(Fp)*(Fp_transposed)
                        // is equal to p.volume * σ

                        // note: currently "cell.v" refers to MOMENTUM, not velocity!
                        // this gets converted in the UpdateGrid step below.

                        grid[cell_index] = cell;
                    }
                }
            }
        }

C# (CSharp) uint2, GPGPU-N-Body-Sim示例