static public void DiffuseReflection4(Random rng, Ray4 ray, ref Vector<float> Rx4, ref Vector<float> Ry4, ref Vector<float> Rz4)
 {
     Vector<float> r1 = RandomFloat4(rng);
     Vector<float> r2 = RandomFloat4(rng);
     Vector<float> r = Vector.SquareRoot(Vector<float>.One - r1 * r1);
     Vector<float> phi = 2 * new Vector<float>(PI) * r2;
     Rx4 = Cosine4(phi) * r;
     Ry4 = Sine4(phi) * r;
     Rz4 = r1;
     Vector<float> NdotR = ray.Nx4 * Rx4 + ray.Ny4 * Ry4 + ray.Nz4 * Rz4;
     Vector<int> mask = Vector.LessThan(NdotR, Vector<float>.Zero);
     Rx4 = Vector.ConditionalSelect(mask, Vector.Negate(Rx4), Rx4);
     Ry4 = Vector.ConditionalSelect(mask, Vector.Negate(Ry4), Ry4);
     Rz4 = Vector.ConditionalSelect(mask, Vector.Negate(Rz4), Rz4);
 }
    public Ray4 Generate4(Random rng, int x, int y)
    {
        Vector<float> x4, y4;
        float[] xs = new float[Game.SIMDLENGTH];
        float[] ys = new float[Game.SIMDLENGTH];
        int h = Game.SIMDLENGTH / 2;
        for (int i = 0; i < h; i++)
        {
            xs[i] = x + i; xs[i + h] = x + i;
            ys[i] = y; ys[i + h] = y + 1;
        }
        x4 = new Vector<float>(xs); y4 = new Vector<float>(ys);

        Vector<float> r0 = RTTools.RandomFloat4(rng);
        Vector<float> r1 = RTTools.RandomFloat4(rng);
        Vector<float> r2 = RTTools.RandomFloat4(rng) - new Vector<float>(0.5f);
        Vector<float> r3 = RTTools.RandomFloat4(rng) - new Vector<float>(0.5f);
        // calculate sub-pixel ray target position on screen plane
        Vector<float> u = (x4 + r0) / new Vector<float>(screenWidth);
        Vector<float> v = (y4 + r1) / new Vector<float>(screenHeight);
        Vector<float> Tx4 = new Vector<float>(p1.X) + u * (new Vector<float>(p2.X - p1.X)) + v * (new Vector<float>(p3.X - p1.X));
        Vector<float> Ty4 = new Vector<float>(p1.Y) + u * (new Vector<float>(p2.Y - p1.Y)) + v * (new Vector<float>(p3.Y - p1.Y));
        Vector<float> Tz4 = new Vector<float>(p1.Z) + u * (new Vector<float>(p2.Z - p1.Z)) + v * (new Vector<float>(p3.Z - p1.Z));
        // calculate position on aperture
        Vector<float> Px4 = new Vector<float>(pos.X) + new Vector<float>(lensSize) * (r2 * new Vector<float>(right.X) + r3 * new Vector<float>(up.X));
        Vector<float> Py4 = new Vector<float>(pos.Y) + new Vector<float>(lensSize) * (r2 * new Vector<float>(right.Y) + r3 * new Vector<float>(up.Y));
        Vector<float> Pz4 = new Vector<float>(pos.Z) + new Vector<float>(lensSize) * (r2 * new Vector<float>(right.Z) + r3 * new Vector<float>(up.Z));
        // calculate ray direction
        Vector<float> Dx4 = Tx4 - Px4; Vector<float> Dy4 = Ty4 - Py4; Vector<float> Dz4 = Tz4 - Pz4;
        Vector<float> length = Vector.SquareRoot(Dx4 * Dx4 + Dy4 * Dy4 + Dz4 * Dz4);
        Dx4 /= length; Dy4 /= length; Dz4 /= length;
        // return new primary ray
        Ray4 ray = new Ray4();
        ray.Ox4 = Px4; ray.Oy4 = Py4; ray.Oz4 = Pz4;
        ray.Dx4 = Dx4; ray.Dy4 = Dy4; ray.Dz4 = Dz4;
        ray.t4 = new Vector<float>(1e34f);
        return ray;
    }
 public static void Intersect4(Ray4 ray)
 {
     IntersectSphere4(0, plane1, ray);
     IntersectSphere4(1, plane2, ray);
     for (int i = 0; i < 6; i++) IntersectSphere4(i + 2, sphere[i], ray);
     IntersectSphere4(8, light, ray);
 }
        private static void IntersectSphere4(int idx, Sphere sphere, Ray4 ray)
        {
            Vector<float> Lx4 = new Vector<float>(sphere.pos.X) - ray.Ox4;
            Vector<float> Ly4 = new Vector<float>(sphere.pos.Y) - ray.Oy4;
            Vector<float> Lz4 = new Vector<float>(sphere.pos.Z) - ray.Oz4;

            // calculate dot product between L and ray.D
            Vector<float> tca4 = Lx4 * ray.Dx4 + Ly4 * ray.Dy4 + Lz4 * ray.Dz4;

            Vector<int> mask1 = Vector.LessThan(tca4, Vector<float>.Zero);
            Vector<float> LdotL = Lx4 * Lx4 + Ly4 * Ly4 + Lz4 * Lz4;
            Vector<float> d2 = LdotL - tca4 * tca4;

            Vector<int> mask2 = Vector.GreaterThan(d2, new Vector<float>(sphere.r));
            Vector<int> mask = Vector.BitwiseOr(mask1, mask2); // if a ray satisfies both masks, the normal stays the same

            Vector<float> thc4 = Vector.SquareRoot(new Vector<float>(sphere.r) - d2);
            Vector<float> t0 = tca4 - thc4;
            Vector<float> t1 = tca4 + thc4;

            Vector<int> mask_t0gt0 = Vector.GreaterThan(t0, Vector<float>.Zero);
            Vector<int> mask_t0gtrayt = Vector.GreaterThan(t0, ray.t4);
            Vector<int> mask_t1gtrt_or_t1lt0 = Vector.BitwiseOr(Vector.GreaterThan(t1, ray.t4), Vector.LessThan(t1, Vector<float>.Zero));

            // calculate the (unnormalized) normals
            ray.Nx4 = Vector.ConditionalSelect(mask,
                ray.Nx4,
                Vector.ConditionalSelect(mask_t0gt0,
                    Vector.ConditionalSelect(mask_t0gtrayt,
                        ray.Nx4,
                        ray.Ox4 + t0 * ray.Dx4 - new Vector<float>(sphere.pos.X)),
                    Vector.ConditionalSelect(mask_t1gtrt_or_t1lt0,
                        ray.Nx4,
                        new Vector<float>(sphere.pos.X) - (ray.Ox4 + t1 * ray.Dx4))));
            ray.Ny4 = Vector.ConditionalSelect(mask,
                ray.Ny4,
                Vector.ConditionalSelect(mask_t0gt0,
                    Vector.ConditionalSelect(mask_t0gtrayt,
                        ray.Ny4,
                        ray.Oy4 + t0 * ray.Dy4 - new Vector<float>(sphere.pos.Y)),
                    Vector.ConditionalSelect(mask_t1gtrt_or_t1lt0,
                        ray.Ny4,
                        new Vector<float>(sphere.pos.Y) - (ray.Oy4 + t1 * ray.Dy4))));
            ray.Nz4 = Vector.ConditionalSelect(mask,
                ray.Nz4,
                Vector.ConditionalSelect(mask_t0gt0,
                    Vector.ConditionalSelect(mask_t0gtrayt,
                        ray.Nz4,
                        ray.Oz4 + t0 * ray.Dz4 - new Vector<float>(sphere.pos.Z)),
                    Vector.ConditionalSelect(mask_t1gtrt_or_t1lt0,
                        ray.Nz4,
                        new Vector<float>(sphere.pos.Z) - (ray.Oz4 + t1 * ray.Dz4))));

            // normalize the normals
            Vector<float> length = Vector.SquareRoot(ray.Nx4 * ray.Nx4 + ray.Ny4 * ray.Ny4 + ray.Nz4 * ray.Nz4);
            ray.Nx4 /= length; ray.Ny4 /= length; ray.Nz4 /= length;

            // get the objIdxs
            ray.objIdx4 = Vector.ConditionalSelect(mask,
                ray.objIdx4,
                Vector.ConditionalSelect(mask_t0gt0,
                    Vector.ConditionalSelect(mask_t0gtrayt,
                        ray.objIdx4,
                        new Vector<int>(idx)),
                    Vector.ConditionalSelect(mask_t1gtrt_or_t1lt0,
                        ray.objIdx4,
                        new Vector<int>(idx))));

            ray.t4 = Vector.ConditionalSelect(mask,
                ray.t4,
                Vector.ConditionalSelect(mask_t0gt0,
                    Vector.ConditionalSelect(mask_t0gtrayt,
                        ray.t4,
                        t0),
                    Vector.ConditionalSelect(mask_t1gtrt_or_t1lt0,
                        ray.t4,
                        t1)));
        }
 public void SampleSkydome4(Ray4 ray, ref Vector<float> x4, ref Vector<float> y4, ref Vector<float> z4)
 {
     Vector<float> u = new Vector<float>(2500.0f * 0.5f) * (Vector<float>.One + RTTools.Atan2(ray.Dx4, Vector.Negate(ray.Dz4)) * new Vector<float>(INVPI));
     Vector<float> v = new Vector<float>(1250.0f * INVPI) * RTTools.Acos(ray.Dy4);
     float[] xs = new float[Game.SIMDLENGTH], ys = new float[Game.SIMDLENGTH], zs = new float[Game.SIMDLENGTH];
     for (int i = 0; i < Game.SIMDLENGTH; i++)
     {
         int idx = (int)u[i] + (int)v[i] * 2500;
         xs[i] = skybox[idx * 3 + 0];
         ys[i] = skybox[idx * 3 + 1];
         zs[i] = skybox[idx * 3 + 2];
     }
     x4 = new Vector<float>(xs); y4 = new Vector<float>(ys); z4 = new Vector<float>(zs); ;
 }
    static public void Refraction4(Random rng, Ray4 ray, ref Vector<float> Rx4, ref Vector<float> Ry4, ref Vector<float> Rz4)
    {
        Vector<int> insideMask = Vector.Equals(ray.inside4, Vector<int>.One);
        Vector<float> nc = Vector.ConditionalSelect(insideMask, Vector<float>.One, new Vector<float>(1.2f));
        Vector<float> nt = Vector.ConditionalSelect(insideMask, new Vector<float>(1.2f), Vector<float>.One);
        Vector<float> nnt = nt / nc;
        Vector<float> ddn = ray.Dx4 * ray.Nx4 + ray.Dy4 * ray.Ny4 + ray.Dz4 * ray.Nz4;
        Vector<float> cos2t = Vector<float>.One - nnt * nnt * (Vector<float>.One - ddn * ddn);
        Rx4 = ray.Dx4 - 2 * ddn * ray.Nx4;
        Ry4 = ray.Dy4 - 2 * ddn * ray.Ny4;
        Rz4 = ray.Dz4 - 2 * ddn * ray.Nz4;

        Vector<int> cos2tMask = Vector.GreaterThanOrEqual(cos2t, Vector<float>.Zero);
        Vector<float> r1 = RTTools.RandomFloat4(rng);
        Vector<float> a = nt - nc; Vector<float> b = nt + nc; Vector<float> R0 = a * a / (b * b); Vector<float> c = Vector<float>.One + ddn;
        Vector<float> Tr = Vector<float>.One - (R0 + (Vector<float>.One - R0) * c * c * c * c * c);

        Vector<int> r1trMask = Vector.LessThan(r1, Tr);
        Vector<int> mask = Vector.BitwiseAnd(cos2tMask, r1trMask);
        Rx4 = Vector.ConditionalSelect(mask, ray.Dx4 * nnt - ray.Nx4 * (ddn * nnt + Vector.SquareRoot(cos2t)), Rx4);
        Ry4 = Vector.ConditionalSelect(mask, ray.Dy4 * nnt - ray.Ny4 * (ddn * nnt + Vector.SquareRoot(cos2t)), Ry4);
        Rz4 = Vector.ConditionalSelect(mask, ray.Dz4 * nnt - ray.Nz4 * (ddn * nnt + Vector.SquareRoot(cos2t)), Rz4);
    }
 static public void Reflection4(Ray4 ray, ref Vector<float> Rx4, ref Vector<float> Ry4, ref Vector<float> Rz4)
 {
     Vector<float> ddn2 = (ray.Dx4 * ray.Nx4 + ray.Dy4 * ray.Ny4 + ray.Dz4 * ray.Nz4) * 2;
     Rx4 = ray.Dx4 - ddn2 * ray.Nx4;
     Ry4 = ray.Dy4 - ddn2 * ray.Ny4;
     Rz4 = ray.Dz4 - ddn2 * ray.Nz4;
 }
        private Vector<float>[] Sample4(Random rng, Ray4 ray, Vector<int> depth)
        {
            // terminate if all rays have hit the skybox, hit something emissive or have exceeded the depth limit
            if (Vector.EqualsAll(ray.returnMask, new Vector<int>(-1)))
                return new Vector<float>[] { Vector<float>.Zero, Vector<float>.Zero, Vector<float>.Zero };

            // find nearest ray/scene intersection
            Scene.Intersect4(ray);

            Vector<float> colorsx4 = Vector<float>.Zero;
            Vector<float> colorsy4 = Vector<float>.Zero;
            Vector<float> colorsz4 = Vector<float>.Zero;

            // if a ray hits the skydome, sample the skydome with all rays
            Vector<int> skydomeMask = Vector.Equals(ray.objIdx4, new Vector<int>(-1));
            Vector<float> skyx4 = new Vector<float>();
            Vector<float> skyy4 = new Vector<float>();
            Vector<float> skyz4 = new Vector<float>();
            if (Vector.EqualsAny(ray.objIdx4, new Vector<int>(-1)))
                scene.SampleSkydome4(ray, ref skyx4, ref skyy4, ref skyz4);

            Vector<float> Ix4 = ray.Ox4 + ray.t4 * ray.Dx4;
            Vector<float> Iy4 = ray.Oy4 + ray.t4 * ray.Dy4;
            Vector<float> Iz4 = ray.Oz4 + ray.t4 * ray.Dz4;

            // get material at intersection point
            Material4 material = scene.GetMaterial4(ray.objIdx4, Ix4, Iz4);
            Vector<int> emissiveMask = Vector.Equals(material.emissive, Vector<int>.One);

            Vector<int> depthMask = Vector.GreaterThanOrEqual(depth, new Vector<int>(MAXDEPTH));

            // everything from here is to handle material interaction
            Vector<float> r0 = RTTools.RandomFloat4(rng);
            Vector<float> Rx4 = Vector<float>.Zero, refrx4 = Vector<float>.Zero, reflx4 = Vector<float>.Zero, diffrx4 = Vector<float>.Zero;
            Vector<float> Ry4 = Vector<float>.Zero, refry4 = Vector<float>.Zero, refly4 = Vector<float>.Zero, diffry4 = Vector<float>.Zero;
            Vector<float> Rz4 = Vector<float>.Zero, refrz4 = Vector<float>.Zero, reflz4 = Vector<float>.Zero, diffrz4 = Vector<float>.Zero;

            Vector<int> refr1Mask = Vector.LessThan(r0, material.refr);
            Vector<int> refr2Mask = Vector.LessThan(r0, material.refr + material.refl);
            
            if (Vector.LessThanAny(r0, material.refr + material.refl))
            {
                RTTools.Reflection4(ray, ref reflx4, ref refly4, ref reflz4);
                RTTools.Refraction4(rng, ray, ref refrx4, ref refry4, ref refrz4);
            }
            if (Vector.GreaterThanOrEqualAny(r0, material.refr + material.refl))
                RTTools.DiffuseReflection4(rng, ray, ref diffrx4, ref diffry4, ref diffrz4);

            Rx4 = Vector.ConditionalSelect(refr1Mask, refrx4,
                Vector.ConditionalSelect(refr2Mask, reflx4,
                diffrx4));
            Ry4 = Vector.ConditionalSelect(refr1Mask, refry4,
                Vector.ConditionalSelect(refr2Mask, refly4,
                diffry4));
            Rz4 = Vector.ConditionalSelect(refr1Mask, refrz4,
                Vector.ConditionalSelect(refr2Mask, reflz4,
                diffrz4));

            Vector<float> NdotR = ray.Nx4 * Rx4 + ray.Ny4 * Ry4 + ray.Nz4 * Rz4;

            // calculate extension rays
            Ray4 extensionRay = new Ray4();
            extensionRay.Ox4 = Ix4 + Rx4 * EPSILON; extensionRay.Oy4 = Iy4 + Ry4 * EPSILON; extensionRay.Oz4 = Iz4 + Rz4 * EPSILON;
            extensionRay.Dx4 = Rx4; extensionRay.Dy4 = Ry4; extensionRay.Dz4 = Rz4;
            extensionRay.t4 = new Vector<float>(1e34f);
            Vector<int> refr1insideMask = Vector.LessThan(NdotR, Vector<float>.Zero);
            extensionRay.inside4 = Vector.ConditionalSelect(Vector.BitwiseAnd(refr1Mask, refr1insideMask), Vector<int>.One, extensionRay.inside4);

            Vector<int> returnMask = Vector.BitwiseOr(skydomeMask, Vector.BitwiseOr(emissiveMask, depthMask));
            depth = Vector.ConditionalSelect(returnMask, depth, depth + Vector<int>.One);
            extensionRay.returnMask = Vector.BitwiseOr(ray.returnMask, returnMask);

            Vector<float>[] sample4;
            sample4 = Sample4(rng, extensionRay, depth);

            colorsx4 = Vector.ConditionalSelect(skydomeMask, skyx4,
                Vector.ConditionalSelect(emissiveMask, material.diffusex4,
                Vector.ConditionalSelect(depthMask, Vector<float>.Zero,
                Vector.ConditionalSelect(Vector.BitwiseOr(refr1Mask, refr2Mask), material.diffusex4 * sample4[0],
                NdotR * material.diffusex4 * sample4[0]))));
            colorsy4 = Vector.ConditionalSelect(skydomeMask, skyy4,
                Vector.ConditionalSelect(emissiveMask, material.diffusey4,
                Vector.ConditionalSelect(depthMask, Vector<float>.Zero,
                Vector.ConditionalSelect(Vector.BitwiseOr(refr1Mask, refr2Mask), material.diffusey4 * sample4[1],
                NdotR * material.diffusey4 * sample4[1]))));
            colorsz4 = Vector.ConditionalSelect(skydomeMask, skyz4,
                Vector.ConditionalSelect(emissiveMask, material.diffusez4,
                Vector.ConditionalSelect(depthMask, Vector<float>.Zero,
                Vector.ConditionalSelect(Vector.BitwiseOr(refr1Mask, refr2Mask), material.diffusez4 * sample4[2],
                NdotR * material.diffusez4 * sample4[2]))));
            
            return new Vector<float>[] { colorsx4, colorsy4, colorsz4 };
        }